diff options
Diffstat (limited to 'drivers/base')
65 files changed, 3524 insertions, 1562 deletions
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 064eb52ff7e2..1786d87b29e2 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -167,6 +167,12 @@ config PM_QOS_KUNIT_TEST depends on KUNIT=y default KUNIT_ALL_TESTS +config PM_RUNTIME_KUNIT_TEST + tristate "KUnit Tests for runtime PM" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on PM + default KUNIT_ALL_TESTS + config HMEM_REPORTING bool default n diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 7fb21768ca36..8074a10183dc 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ cpu.o firmware.o init.o map.o devres.o \ attribute_container.o transport_class.o \ topology.o container.o property.o cacheinfo.o \ - swnode.o + swnode.o faux.o obj-$(CONFIG_AUXILIARY_BUS) += auxiliary.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o obj-y += power/ diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index e18701676426..c99f2ab105e5 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -208,6 +208,10 @@ static int __init numa_register_nodes(void) { int nid; + /* Check the validity of the memblock/node mapping */ + if (!memblock_validate_numa_coverage(0)) + return -EINVAL; + /* Finally register nodes. */ for_each_node_mask(nid, numa_nodes_parsed) { unsigned long start_pfn, end_pfn; diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 3ebe77566788..84ec92bff642 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -11,6 +11,7 @@ #include <linux/cleanup.h> #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/cpu_smt.h> #include <linux/device.h> #include <linux/of.h> #include <linux/slab.h> @@ -28,7 +29,7 @@ static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; -DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1; +DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 0; EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); static bool supports_scale_freq_counters(const struct cpumask *cpus) @@ -153,14 +154,6 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, per_cpu(arch_freq_scale, i) = scale; } -DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; -EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale); - -void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) -{ - per_cpu(cpu_scale, cpu) = capacity; -} - DEFINE_PER_CPU(unsigned long, hw_pressure); /** @@ -206,53 +199,9 @@ void topology_update_hw_pressure(const struct cpumask *cpus, } EXPORT_SYMBOL_GPL(topology_update_hw_pressure); -static ssize_t cpu_capacity_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); -} - static void update_topology_flags_workfn(struct work_struct *work); static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); -static DEVICE_ATTR_RO(cpu_capacity); - -static int cpu_capacity_sysctl_add(unsigned int cpu) -{ - struct device *cpu_dev = get_cpu_device(cpu); - - if (!cpu_dev) - return -ENOENT; - - device_create_file(cpu_dev, &dev_attr_cpu_capacity); - - return 0; -} - -static int cpu_capacity_sysctl_remove(unsigned int cpu) -{ - struct device *cpu_dev = get_cpu_device(cpu); - - if (!cpu_dev) - return -ENOENT; - - device_remove_file(cpu_dev, &dev_attr_cpu_capacity); - - return 0; -} - -static int register_cpu_capacity_sysctl(void) -{ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity", - cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove); - - return 0; -} -subsys_initcall(register_cpu_capacity_sysctl); - static int update_topology; int topology_update_cpu_topology(void) @@ -293,13 +242,15 @@ void topology_normalize_cpu_scale(void) capacity_scale = 1; for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); + capacity = raw_capacity[cpu] * + (per_cpu(capacity_freq_ref, cpu) ?: 1); capacity_scale = max(capacity, capacity_scale); } pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); + capacity = raw_capacity[cpu] * + (per_cpu(capacity_freq_ref, cpu) ?: 1); capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity_scale); topology_set_cpu_scale(cpu, capacity); @@ -341,7 +292,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) * frequency (by keeping the initial capacity_freq_ref value). */ cpu_clk = of_clk_get(cpu_node, 0); - if (!PTR_ERR_OR_ZERO(cpu_clk)) { + if (!IS_ERR_OR_NULL(cpu_clk)) { per_cpu(capacity_freq_ref, cpu) = clk_get_rate(cpu_clk) / HZ_PER_KHZ; clk_put(cpu_clk); @@ -506,6 +457,10 @@ core_initcall(free_raw_capacity); #endif #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) + +/* Used to enable the SMT control */ +static unsigned int max_smt_thread_num = 1; + /* * This function returns the logic cpu number of the node. * There are basically three kinds of return values: @@ -565,6 +520,8 @@ static int __init parse_core(struct device_node *core, int package_id, i++; } while (1); + max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i); + cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) { @@ -677,6 +634,17 @@ static int __init parse_socket(struct device_node *socket) if (!has_socket) ret = parse_cluster(socket, 0, -1, 0); + /* + * Reset the max_smt_thread_num to 1 on failure. Since on failure + * we need to notify the framework the SMT is not supported, but + * max_smt_thread_num can be initialized to the SMT thread number + * of the cores which are successfully parsed. + */ + if (ret) + max_smt_thread_num = 1; + + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + return ret; } @@ -855,12 +823,106 @@ void remove_cpu_topology(unsigned int cpu) clear_cpu_topology(cpu); } +#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) +struct cpu_smt_info { + unsigned int thread_num; + int core_id; +}; + +static bool __init acpi_cpu_is_threaded(int cpu) +{ + int is_threaded = acpi_pptt_cpu_is_thread(cpu); + + /* + * if the PPTT doesn't have thread information, check for architecture + * specific fallback if available + */ + if (is_threaded < 0) + is_threaded = arch_cpu_is_threaded(); + + return !!is_threaded; +} + +/* + * Propagate the topology information of the processor_topology_node tree to the + * cpu_topology array. + */ __weak int __init parse_acpi_topology(void) { + unsigned int max_smt_thread_num = 1; + struct cpu_smt_info *entry; + struct xarray hetero_cpu; + unsigned long hetero_id; + int cpu, topology_id; + + if (acpi_disabled) + return 0; + + xa_init(&hetero_cpu); + + for_each_possible_cpu(cpu) { + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) + return topology_id; + + if (acpi_cpu_is_threaded(cpu)) { + cpu_topology[cpu].thread_id = topology_id; + topology_id = find_acpi_cpu_topology(cpu, 1); + cpu_topology[cpu].core_id = topology_id; + + /* + * In the PPTT, CPUs below a node with the 'identical + * implementation' flag have the same number of threads. + * Count the number of threads for only one CPU (i.e. + * one core_id) among those with the same hetero_id. + * See the comment of find_acpi_cpu_topology_hetero_id() + * for more details. + * + * One entry is created for each node having: + * - the 'identical implementation' flag + * - its parent not having the flag + */ + hetero_id = find_acpi_cpu_topology_hetero_id(cpu); + entry = xa_load(&hetero_cpu, hetero_id); + if (!entry) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + WARN_ON_ONCE(!entry); + + if (entry) { + entry->core_id = topology_id; + entry->thread_num = 1; + xa_store(&hetero_cpu, hetero_id, + entry, GFP_KERNEL); + } + } else if (entry->core_id == topology_id) { + entry->thread_num++; + } + } else { + cpu_topology[cpu].thread_id = -1; + cpu_topology[cpu].core_id = topology_id; + } + topology_id = find_acpi_cpu_topology_cluster(cpu); + cpu_topology[cpu].cluster_id = topology_id; + topology_id = find_acpi_cpu_topology_package(cpu); + cpu_topology[cpu].package_id = topology_id; + } + + /* + * This is a short loop since the number of XArray elements is the + * number of heterogeneous CPU clusters. On a homogeneous system + * there's only one entry in the XArray. + */ + xa_for_each(&hetero_cpu, hetero_id, entry) { + max_smt_thread_num = max(max_smt_thread_num, entry->thread_num); + xa_erase(&hetero_cpu, hetero_id); + kfree(entry); + } + + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + xa_destroy(&hetero_cpu); return 0; } -#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) void __init init_cpu_topology(void) { int cpu, ret; diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 7823888af4f6..04bdbff4dbe5 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -92,7 +92,7 @@ * Auxiliary devices are created and registered by a subsystem-level core * device that needs to break up its functionality into smaller fragments. One * way to extend the scope of an auxiliary_device is to encapsulate it within a - * domain- pecific structure defined by the parent device. This structure + * domain-specific structure defined by the parent device. This structure * contains the auxiliary_device and any associated shared data/callbacks * needed to establish the connection with the parent. * @@ -156,22 +156,33 @@ * }, * .ops = my_custom_ops, * }; + * + * Please note that such custom ops approach is valid, but it is hard to implement + * it right without global locks per-device to protect from auxiliary_drv removal + * during call to that ops. In addition, this implementation lacks proper module + * dependency, which causes to load/unload races between auxiliary parent and devices + * modules. + * + * The most easiest way to provide these ops reliably without needing to + * have a lock is to EXPORT_SYMBOL*() them and rely on already existing + * modules infrastructure for validity and correct dependencies chains. */ static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, const struct auxiliary_device *auxdev) { - for (; id->name[0]; id++) { - const char *p = strrchr(dev_name(&auxdev->dev), '.'); - int match_size; + const char *auxdev_name = dev_name(&auxdev->dev); + const char *p = strrchr(auxdev_name, '.'); + int match_size; - if (!p) - continue; - match_size = p - dev_name(&auxdev->dev); + if (!p) + return NULL; + match_size = p - auxdev_name; + for (; id->name[0]; id++) { /* use dev_name(&auxdev->dev) prefix before last '.' char to match to */ if (strlen(id->name) == match_size && - !strncmp(dev_name(&auxdev->dev), id->name, match_size)) + !strncmp(auxdev_name, id->name, match_size)) return id; } return NULL; @@ -207,17 +218,14 @@ static int auxiliary_bus_probe(struct device *dev) struct auxiliary_device *auxdev = to_auxiliary_dev(dev); int ret; - ret = dev_pm_domain_attach(dev, true); + ret = dev_pm_domain_attach(dev, PD_FLAG_ATTACH_POWER_ON | + PD_FLAG_DETACH_POWER_OFF); if (ret) { dev_warn(dev, "Failed to attach to PM Domain : %d\n", ret); return ret; } - ret = auxdrv->probe(auxdev, auxiliary_match_id(auxdrv->id_table, auxdev)); - if (ret) - dev_pm_domain_detach(dev, true); - - return ret; + return auxdrv->probe(auxdev, auxiliary_match_id(auxdrv->id_table, auxdev)); } static void auxiliary_bus_remove(struct device *dev) @@ -227,7 +235,6 @@ static void auxiliary_bus_remove(struct device *dev) if (auxdrv->remove) auxdrv->remove(auxdev); - dev_pm_domain_detach(dev, true); } static void auxiliary_bus_shutdown(struct device *dev) @@ -336,35 +343,6 @@ int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname) EXPORT_SYMBOL_GPL(__auxiliary_device_add); /** - * auxiliary_find_device - auxiliary device iterator for locating a particular device. - * @start: Device to begin with - * @data: Data to pass to match function - * @match: Callback function to check device - * - * This function returns a reference to a device that is 'found' - * for later use, as determined by the @match callback. - * - * The reference returned should be released with put_device(). - * - * The callback should return 0 if the device doesn't match and non-zero - * if it does. If the callback returns non-zero, this function will - * return to the caller and not iterate over any more devices. - */ -struct auxiliary_device *auxiliary_find_device(struct device *start, - const void *data, - device_match_t match) -{ - struct device *dev; - - dev = bus_find_device(&auxiliary_bus_type, start, data, match); - if (!dev) - return NULL; - - return to_auxiliary_dev(dev); -} -EXPORT_SYMBOL_GPL(auxiliary_find_device); - -/** * __auxiliary_driver_register - register a driver for auxiliary bus devices * @auxdrv: auxiliary_driver structure * @owner: owning module/driver @@ -414,6 +392,116 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv) } EXPORT_SYMBOL_GPL(auxiliary_driver_unregister); +static void auxiliary_device_release(struct device *dev) +{ + struct auxiliary_device *auxdev = to_auxiliary_dev(dev); + + of_node_put(dev->of_node); + kfree(auxdev); +} + +/** + * auxiliary_device_create - create a device on the auxiliary bus + * @dev: parent device + * @modname: module name used to create the auxiliary driver name. + * @devname: auxiliary bus device name + * @platform_data: auxiliary bus device platform data + * @id: auxiliary bus device id + * + * Helper to create an auxiliary bus device. + * The device created matches driver 'modname.devname' on the auxiliary bus. + */ +struct auxiliary_device *auxiliary_device_create(struct device *dev, + const char *modname, + const char *devname, + void *platform_data, + int id) +{ + struct auxiliary_device *auxdev; + int ret; + + auxdev = kzalloc(sizeof(*auxdev), GFP_KERNEL); + if (!auxdev) + return NULL; + + auxdev->id = id; + auxdev->name = devname; + auxdev->dev.parent = dev; + auxdev->dev.platform_data = platform_data; + auxdev->dev.release = auxiliary_device_release; + device_set_of_node_from_dev(&auxdev->dev, dev); + + ret = auxiliary_device_init(auxdev); + if (ret) { + of_node_put(auxdev->dev.of_node); + kfree(auxdev); + return NULL; + } + + ret = __auxiliary_device_add(auxdev, modname); + if (ret) { + /* + * It may look odd but auxdev should not be freed here. + * auxiliary_device_uninit() calls device_put() which call + * the device release function, freeing auxdev. + */ + auxiliary_device_uninit(auxdev); + return NULL; + } + + return auxdev; +} +EXPORT_SYMBOL_GPL(auxiliary_device_create); + +/** + * auxiliary_device_destroy - remove an auxiliary device + * @auxdev: pointer to the auxdev to be removed + * + * Helper to remove an auxiliary device created with + * auxiliary_device_create() + */ +void auxiliary_device_destroy(void *auxdev) +{ + struct auxiliary_device *_auxdev = auxdev; + + auxiliary_device_delete(_auxdev); + auxiliary_device_uninit(_auxdev); +} +EXPORT_SYMBOL_GPL(auxiliary_device_destroy); + +/** + * __devm_auxiliary_device_create - create a managed device on the auxiliary bus + * @dev: parent device + * @modname: module name used to create the auxiliary driver name. + * @devname: auxiliary bus device name + * @platform_data: auxiliary bus device platform data + * @id: auxiliary bus device id + * + * Device managed helper to create an auxiliary bus device. + * The device created matches driver 'modname.devname' on the auxiliary bus. + */ +struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev, + const char *modname, + const char *devname, + void *platform_data, + int id) +{ + struct auxiliary_device *auxdev; + int ret; + + auxdev = auxiliary_device_create(dev, modname, devname, platform_data, id); + if (!auxdev) + return NULL; + + ret = devm_add_action_or_reset(dev, auxiliary_device_destroy, + auxdev); + if (ret) + return NULL; + + return auxdev; +} +EXPORT_SYMBOL_GPL(__devm_auxiliary_device_create); + void __init auxiliary_bus_init(void) { WARN_ON(bus_register(&auxiliary_bus_type)); diff --git a/drivers/base/base.h b/drivers/base/base.h index 8cf04a557bdb..430cbefbc97f 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -73,6 +73,7 @@ static inline void subsys_put(struct subsys_private *sp) kset_put(&sp->subsys); } +struct subsys_private *bus_to_subsys(const struct bus_type *bus); struct subsys_private *class_to_subsys(const struct class *class); struct driver_private { @@ -84,6 +85,18 @@ struct driver_private { }; #define to_driver(obj) container_of(obj, struct driver_private, kobj) +#ifdef CONFIG_RUST +/** + * struct driver_type - Representation of a Rust driver type. + */ +struct driver_type { + /** + * @id: Representation of core::any::TypeId. + */ + u8 id[16]; +} __packed; +#endif + /** * struct device_private - structure to hold the private to the driver core portions of the device structure. * @@ -99,6 +112,7 @@ struct driver_private { * @async_driver - pointer to device driver awaiting probe via async_probe * @device - pointer back to the struct device that this structure is * associated with. + * @driver_type - The type of the bound Rust driver. * @dead - This device is currently either in the process of or has been * removed from the system. Any asynchronous events scheduled for this * device should exit without taking any action. @@ -115,6 +129,9 @@ struct device_private { const struct device_driver *async_driver; char *deferred_probe_reason; struct device *device; +#ifdef CONFIG_RUST + struct driver_type driver_type; +#endif u8 dead:1; }; #define to_device_private_parent(obj) \ @@ -137,6 +154,7 @@ int hypervisor_init(void); static inline int hypervisor_init(void) { return 0; } #endif int platform_bus_init(void); +int faux_bus_init(void); void cpu_dev_init(void); void container_dev_init(void); #ifdef CONFIG_AUXILIARY_BUS @@ -179,6 +197,22 @@ int driver_add_groups(const struct device_driver *drv, const struct attribute_gr void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev); +static inline void device_set_driver(struct device *dev, const struct device_driver *drv) +{ + /* + * Majority (all?) read accesses to dev->driver happens either + * while holding device lock or in bus/driver code that is only + * invoked when the device is bound to a driver and there is no + * concern of the pointer being changed while it is being read. + * However when reading device's uevent file we read driver pointer + * without taking device lock (so we do not block there for + * arbitrary amount of time). We use WRITE_ONCE() here to prevent + * tearing so that READ_ONCE() can safely be used in uevent code. + */ + // FIXME - this cast should not be needed "soon" + WRITE_ONCE(dev->driver, (struct device_driver *)drv); +} + int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); @@ -230,9 +264,18 @@ void device_links_driver_cleanup(struct device *dev); void device_links_no_driver(struct device *dev); bool device_links_busy(struct device *dev); void device_links_unbind_consumers(struct device *dev); +bool device_link_flag_is_sync_state_only(u32 flags); void fw_devlink_drivers_done(void); void fw_devlink_probing_done(void); +#define dev_for_each_link_to_supplier(__link, __dev) \ + list_for_each_entry_srcu(__link, &(__dev)->links.suppliers, c_node, \ + device_links_read_lock_held()) + +#define dev_for_each_link_to_consumer(__link, __dev) \ + list_for_each_entry_srcu(__link, &(__dev)->links.consumers, s_node, \ + device_links_read_lock_held()) + /* device pm support */ void device_pm_move_to_tail(struct device *dev); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 657c93c38b0d..9eb7771706f0 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -57,7 +57,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ -static struct subsys_private *bus_to_subsys(const struct bus_type *bus) +struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; @@ -334,6 +334,19 @@ static struct device *next_device(struct klist_iter *i) return dev; } +static struct device *prev_device(struct klist_iter *i) +{ + struct klist_node *n = klist_prev(i); + struct device *dev = NULL; + struct device_private *dev_prv; + + if (n) { + dev_prv = to_device_private_bus(n); + dev = dev_prv->device; + } + return dev; +} + /** * bus_for_each_dev - device iterator. * @bus: bus type. @@ -354,7 +367,7 @@ static struct device *next_device(struct klist_iter *i) * count in the supplied callback. */ int bus_for_each_dev(const struct bus_type *bus, struct device *start, - void *data, int (*fn)(struct device *, void *)) + void *data, device_iter_t fn) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; @@ -402,15 +415,43 @@ struct device *bus_find_device(const struct bus_type *bus, klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); - while ((dev = next_device(&i))) - if (match(dev, data) && get_device(dev)) + while ((dev = next_device(&i))) { + if (match(dev, data)) { + get_device(dev); break; + } + } klist_iter_exit(&i); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(bus_find_device); +struct device *bus_find_device_reverse(const struct bus_type *bus, + struct device *start, const void *data, + device_match_t match) +{ + struct subsys_private *sp = bus_to_subsys(bus); + struct klist_iter i; + struct device *dev; + + if (!sp) + return NULL; + + klist_iter_init_node(&sp->klist_devices, &i, + (start ? &start->p->knode_bus : NULL)); + while ((dev = prev_device(&i))) { + if (match(dev, data)) { + get_device(dev); + break; + } + } + klist_iter_exit(&i); + subsys_put(sp); + return dev; +} +EXPORT_SYMBOL_GPL(bus_find_device_reverse); + static struct device_driver *next_driver(struct klist_iter *i) { struct klist_node *n = klist_next(i); @@ -530,8 +571,7 @@ void bus_probe_device(struct device *dev) if (!sp) return; - if (sp->drivers_autoprobe) - device_initial_probe(dev); + device_initial_probe(dev); mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) @@ -1288,7 +1328,7 @@ EXPORT_SYMBOL_GPL(subsys_system_register); * @groups: default attributes for the root device * * All 'virtual' subsystems have a /sys/devices/system/<name> root device - * with the name of the subystem. The root device can carry subsystem-wide + * with the name of the subsystem. The root device can carry subsystem-wide * attributes. All registered devices are below this single root device. * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 7a7609298e18..613410705a47 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/acpi.h> +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/cacheinfo.h> #include <linux/compiler.h> @@ -58,7 +59,7 @@ bool last_level_cache_is_valid(unsigned int cpu) { struct cacheinfo *llc; - if (!cache_leaves(cpu)) + if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu)) return false; llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); @@ -183,6 +184,54 @@ static bool cache_node_is_unified(struct cacheinfo *this_leaf, return of_property_read_bool(np, "cache-unified"); } +static bool match_cache_node(struct device_node *cpu, + const struct device_node *cache_node) +{ + struct device_node *prev, *cache = of_find_next_cache_node(cpu); + + while (cache) { + if (cache == cache_node) { + of_node_put(cache); + return true; + } + + prev = cache; + cache = of_find_next_cache_node(cache); + of_node_put(prev); + } + + return false; +} + +#ifndef arch_compact_of_hwid +#define arch_compact_of_hwid(_x) (_x) +#endif + +static void cache_of_set_id(struct cacheinfo *this_leaf, + struct device_node *cache_node) +{ + struct device_node *cpu; + u32 min_id = ~0; + + for_each_of_cpu_node(cpu) { + u64 id = of_get_cpu_hwid(cpu, 0); + + id = arch_compact_of_hwid(id); + if (FIELD_GET(GENMASK_ULL(63, 32), id)) { + of_node_put(cpu); + return; + } + + if (match_cache_node(cpu, cache_node)) + min_id = min(min_id, id); + } + + if (min_id != ~0) { + this_leaf->id = min_id; + this_leaf->attributes |= CACHE_ID; + } +} + static void cache_of_set_props(struct cacheinfo *this_leaf, struct device_node *np) { @@ -198,6 +247,7 @@ static void cache_of_set_props(struct cacheinfo *this_leaf, cache_get_line_size(this_leaf, np); cache_nr_sets(this_leaf, np); cache_associativity(this_leaf); + cache_of_set_id(this_leaf, np); } static int cache_setup_of_node(unsigned int cpu) @@ -254,11 +304,11 @@ static int of_count_cache_leaves(struct device_node *np) { unsigned int leaves = 0; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ++leaves; - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ++leaves; - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ++leaves; if (!leaves) { @@ -367,9 +417,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); for_each_online_cpu(i) { - struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); - - if (i == cpu || !sib_cpu_ci->info_list) + if (i == cpu || !per_cpu_cacheinfo(i)) continue;/* skip if itself or no cacheinfo */ for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) { sib_leaf = per_cpu_cacheinfo_idx(i, sib_index); @@ -409,10 +457,7 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { this_leaf = per_cpu_cacheinfo_idx(cpu, index); for_each_cpu(sibling, &this_leaf->shared_cpu_map) { - struct cpu_cacheinfo *sib_cpu_ci = - get_cpu_cacheinfo(sibling); - - if (sibling == cpu || !sib_cpu_ci->info_list) + if (sibling == cpu || !per_cpu_cacheinfo(sibling)) continue;/* skip if itself or no cacheinfo */ for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) { @@ -463,11 +508,9 @@ int __weak populate_cache_leaves(unsigned int cpu) return -ENOENT; } -static inline -int allocate_cache_info(int cpu) +static inline int allocate_cache_info(int cpu) { - per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), - sizeof(struct cacheinfo), GFP_ATOMIC); + per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC); if (!per_cpu_cacheinfo(cpu)) { cache_leaves(cpu) = 0; return -ENOMEM; @@ -539,7 +582,11 @@ static inline int init_level_allocate_ci(unsigned int cpu) */ ci_cacheinfo(cpu)->early_ci_levels = false; - if (cache_leaves(cpu) <= early_leaves) + /* + * Some architectures (e.g., x86) do not use early initialization. + * Allocate memory now in such case. + */ + if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu)) return 0; kfree(per_cpu_cacheinfo(cpu)); diff --git a/drivers/base/class.c b/drivers/base/class.c index cb5359235c70..2526c57d924e 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -323,8 +323,12 @@ void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, struct subsys_private *sp = class_to_subsys(class); struct klist_node *start_knode = NULL; - if (!sp) + memset(iter, 0, sizeof(*iter)); + if (!sp) { + pr_crit("%s: class %p was not registered yet\n", + __func__, class); return; + } if (start) start_knode = &start->p->knode_class; @@ -351,6 +355,9 @@ struct device *class_dev_iter_next(struct class_dev_iter *iter) struct klist_node *knode; struct device *dev; + if (!iter->sp) + return NULL; + while (1) { knode = klist_next(&iter->ki); if (!knode) @@ -395,7 +402,7 @@ EXPORT_SYMBOL_GPL(class_dev_iter_exit); * code. There's no locking restriction. */ int class_for_each_device(const struct class *class, const struct device *start, - void *data, int (*fn)(struct device *, void *)) + void *data, device_iter_t fn) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; @@ -405,7 +412,7 @@ int class_for_each_device(const struct class *class, const struct device *start, if (!class) return -EINVAL; if (!sp) { - WARN(1, "%s called for class '%s' before it was initialized", + WARN(1, "%s called for class '%s' before it was registered", __func__, class->name); return -EINVAL; } @@ -453,7 +460,7 @@ struct device *class_find_device(const struct class *class, const struct device if (!class) return NULL; if (!sp) { - WARN(1, "%s called for class '%s' before it was initialized", + WARN(1, "%s called for class '%s' before it was registered", __func__, class->name); return NULL; } @@ -594,30 +601,10 @@ EXPORT_SYMBOL_GPL(class_compat_unregister); * a bus device * @cls: the compatibility class * @dev: the target bus device - * @device_link: an optional device to which a "device" link should be created */ -int class_compat_create_link(struct class_compat *cls, struct device *dev, - struct device *device_link) +int class_compat_create_link(struct class_compat *cls, struct device *dev) { - int error; - - error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev)); - if (error) - return error; - - /* - * Optionally add a "device" link (typically to the parent), as a - * class device would have one and we want to provide as much - * backwards compatibility as possible. - */ - if (device_link) { - error = sysfs_create_link(&dev->kobj, &device_link->kobj, - "device"); - if (error) - sysfs_remove_link(cls->kobj, dev_name(dev)); - } - - return error; + return sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_create_link); @@ -626,14 +613,9 @@ EXPORT_SYMBOL_GPL(class_compat_create_link); * a bus device * @cls: the compatibility class * @dev: the target bus device - * @device_link: an optional device to which a "device" link was previously - * created */ -void class_compat_remove_link(struct class_compat *cls, struct device *dev, - struct device *device_link) +void class_compat_remove_link(struct class_compat *cls, struct device *dev) { - if (device_link) - sysfs_remove_link(&dev->kobj, "device"); sysfs_remove_link(cls->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_remove_link); diff --git a/drivers/base/component.c b/drivers/base/component.c index 741497324d78..024ad9471b8a 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -87,17 +87,17 @@ static int component_devices_show(struct seq_file *s, void *data) size_t i; mutex_lock(&component_mutex); - seq_printf(s, "%-40s %20s\n", "aggregate_device name", "status"); - seq_puts(s, "-------------------------------------------------------------\n"); - seq_printf(s, "%-40s %20s\n\n", + seq_printf(s, "%-50s %20s\n", "aggregate_device name", "status"); + seq_puts(s, "-----------------------------------------------------------------------\n"); + seq_printf(s, "%-50s %20s\n\n", dev_name(m->parent), m->bound ? "bound" : "not bound"); - seq_printf(s, "%-40s %20s\n", "device name", "status"); - seq_puts(s, "-------------------------------------------------------------\n"); + seq_printf(s, "%-50s %20s\n", "device name", "status"); + seq_puts(s, "-----------------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { struct component *component = match->compare[i].component; - seq_printf(s, "%-40s %20s\n", + seq_printf(s, "%-50s %20s\n", component ? dev_name(component->dev) : "(unknown)", component ? (component->bound ? "bound" : "not bound") : "not registered"); } @@ -569,10 +569,28 @@ void component_master_del(struct device *parent, } EXPORT_SYMBOL_GPL(component_master_del); +bool component_master_is_bound(struct device *parent, + const struct component_master_ops *ops) +{ + struct aggregate_device *adev; + + guard(mutex)(&component_mutex); + adev = __aggregate_find(parent, ops); + if (!adev) + return 0; + + return adev->bound; +} +EXPORT_SYMBOL_GPL(component_master_is_bound); + static void component_unbind(struct component *component, struct aggregate_device *adev, void *data) { - WARN_ON(!component->bound); + if (WARN_ON(!component->bound)) + return; + + dev_dbg(adev->parent, "unbinding %s component %p (ops %ps)\n", + dev_name(component->dev), component, component->ops); if (component->ops && component->ops->unbind) component->ops->unbind(component->dev, adev->parent, data); diff --git a/drivers/base/core.c b/drivers/base/core.c index 529af59e25ff..40de2f51a1b1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -287,7 +287,7 @@ static bool device_is_ancestor(struct device *dev, struct device *target) #define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ DL_FLAG_CYCLE | \ DL_FLAG_MANAGED) -static inline bool device_link_flag_is_sync_state_only(u32 flags) +bool device_link_flag_is_sync_state_only(u32 flags) { return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } @@ -460,9 +460,9 @@ static ssize_t auto_remove_on_show(struct device *dev, struct device_link *link = to_devlink(dev); const char *output; - if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) + if (device_link_test(link, DL_FLAG_AUTOREMOVE_SUPPLIER)) output = "supplier unbind"; - else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) + else if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) output = "consumer unbind"; else output = "never"; @@ -476,7 +476,7 @@ static ssize_t runtime_pm_show(struct device *dev, { struct device_link *link = to_devlink(dev); - return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME)); + return sysfs_emit(buf, "%d\n", device_link_test(link, DL_FLAG_PM_RUNTIME)); } static DEVICE_ATTR_RO(runtime_pm); @@ -485,8 +485,7 @@ static ssize_t sync_state_only_show(struct device *dev, { struct device_link *link = to_devlink(dev); - return sysfs_emit(buf, "%d\n", - !!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); + return sysfs_emit(buf, "%d\n", device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)); } static DEVICE_ATTR_RO(sync_state_only); @@ -552,7 +551,7 @@ void device_link_wait_removal(void) } EXPORT_SYMBOL_GPL(device_link_wait_removal); -static struct class devlink_class = { +static const struct class devlink_class = { .name = "devlink", .dev_groups = devlink_groups, .dev_release = devlink_dev_release, @@ -792,12 +791,12 @@ struct device_link *device_link_add(struct device *consumer, if (link->consumer != consumer) continue; - if (link->flags & DL_FLAG_INFERRED && + if (device_link_test(link, DL_FLAG_INFERRED) && !(flags & DL_FLAG_INFERRED)) link->flags &= ~DL_FLAG_INFERRED; if (flags & DL_FLAG_PM_RUNTIME) { - if (!(link->flags & DL_FLAG_PM_RUNTIME)) { + if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); link->flags |= DL_FLAG_PM_RUNTIME; } @@ -807,8 +806,8 @@ struct device_link *device_link_add(struct device *consumer, if (flags & DL_FLAG_STATELESS) { kref_get(&link->kref); - if (link->flags & DL_FLAG_SYNC_STATE_ONLY && - !(link->flags & DL_FLAG_STATELESS)) { + if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY) && + !device_link_test(link, DL_FLAG_STATELESS)) { link->flags |= DL_FLAG_STATELESS; goto reorder; } else { @@ -823,7 +822,7 @@ struct device_link *device_link_add(struct device *consumer, * update the existing link to stay around longer. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { - if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { + if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) { link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; } @@ -831,12 +830,12 @@ struct device_link *device_link_add(struct device *consumer, link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER); } - if (!(link->flags & DL_FLAG_MANAGED)) { + if (!device_link_test(link, DL_FLAG_MANAGED)) { kref_get(&link->kref); link->flags |= DL_FLAG_MANAGED; device_link_init_status(link, consumer, supplier); } - if (link->flags & DL_FLAG_SYNC_STATE_ONLY && + if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY) && !(flags & DL_FLAG_SYNC_STATE_ONLY)) { link->flags &= ~DL_FLAG_SYNC_STATE_ONLY; goto reorder; @@ -940,7 +939,7 @@ static void __device_link_del(struct kref *kref) static void device_link_put_kref(struct device_link *link) { - if (link->flags & DL_FLAG_STATELESS) + if (device_link_test(link, DL_FLAG_STATELESS)) kref_put(&link->kref, __device_link_del); else if (!device_is_registered(link->consumer)) __device_link_del(&link->kref); @@ -1004,7 +1003,7 @@ static void device_links_missing_supplier(struct device *dev) if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { - WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); + WARN_ON(!device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } @@ -1072,14 +1071,14 @@ int device_links_check_suppliers(struct device *dev) device_links_write_lock(); list_for_each_entry(link, &dev->links.suppliers, c_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_AVAILABLE && - !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) { + !device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)) { if (dev_is_best_effort(dev) && - link->flags & DL_FLAG_INFERRED && + device_link_test(link, DL_FLAG_INFERRED) && !link->supplier->can_match) { ret = -EAGAIN; continue; @@ -1128,7 +1127,7 @@ static void __device_links_queue_sync_state(struct device *dev, return; list_for_each_entry(link, &dev->links.consumers, s_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_ACTIVE) return; @@ -1268,7 +1267,7 @@ void device_links_force_bind(struct device *dev) device_links_write_lock(); list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_AVAILABLE) { @@ -1329,7 +1328,7 @@ void device_links_driver_bound(struct device *dev) device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; /* @@ -1345,7 +1344,7 @@ void device_links_driver_bound(struct device *dev) WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); - if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) + if (device_link_test(link, DL_FLAG_AUTOPROBE_CONSUMER)) driver_deferred_probe_add(link->consumer); } @@ -1357,11 +1356,11 @@ void device_links_driver_bound(struct device *dev) list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) { struct device *supplier; - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; supplier = link->supplier; - if (link->flags & DL_FLAG_SYNC_STATE_ONLY) { + if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)) { /* * When DL_FLAG_SYNC_STATE_ONLY is set, it means no * other DL_MANAGED_LINK_FLAGS have been set. So, it's @@ -1369,7 +1368,7 @@ void device_links_driver_bound(struct device *dev) */ device_link_drop_managed(link); } else if (dev_is_best_effort(dev) && - link->flags & DL_FLAG_INFERRED && + device_link_test(link, DL_FLAG_INFERRED) && link->status != DL_STATE_CONSUMER_PROBE && !link->supplier->can_match) { /* @@ -1421,10 +1420,10 @@ static void __device_links_no_driver(struct device *dev) struct device_link *link, *ln; list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; - if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { + if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) { device_link_drop_managed(link); continue; } @@ -1436,7 +1435,7 @@ static void __device_links_no_driver(struct device *dev) if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { - WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); + WARN_ON(!device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } @@ -1461,7 +1460,7 @@ void device_links_no_driver(struct device *dev) device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; /* @@ -1498,10 +1497,10 @@ void device_links_driver_cleanup(struct device *dev) device_links_write_lock(); list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; - WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); + WARN_ON(device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)); WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); /* @@ -1510,7 +1509,7 @@ void device_links_driver_cleanup(struct device *dev) * has moved to DL_STATE_SUPPLIER_UNBIND. */ if (link->status == DL_STATE_SUPPLIER_UNBIND && - link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) + device_link_test(link, DL_FLAG_AUTOREMOVE_SUPPLIER)) device_link_drop_managed(link); WRITE_ONCE(link->status, DL_STATE_DORMANT); @@ -1544,7 +1543,7 @@ bool device_links_busy(struct device *dev) device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { - if (!(link->flags & DL_FLAG_MANAGED)) + if (!device_link_test(link, DL_FLAG_MANAGED)) continue; if (link->status == DL_STATE_CONSUMER_PROBE @@ -1586,8 +1585,8 @@ void device_links_unbind_consumers(struct device *dev) list_for_each_entry(link, &dev->links.consumers, s_node) { enum device_link_state status; - if (!(link->flags & DL_FLAG_MANAGED) || - link->flags & DL_FLAG_SYNC_STATE_ONLY) + if (!device_link_test(link, DL_FLAG_MANAGED) || + device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)) continue; status = link->status; @@ -1743,7 +1742,7 @@ static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode) static void fw_devlink_relax_link(struct device_link *link) { - if (!(link->flags & DL_FLAG_INFERRED)) + if (!device_link_test(link, DL_FLAG_INFERRED)) return; if (device_link_flag_is_sync_state_only(link->flags)) @@ -1779,13 +1778,13 @@ static int fw_devlink_dev_sync_state(struct device *dev, void *data) struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; - if (!(link->flags & DL_FLAG_MANAGED) || + if (!device_link_test(link, DL_FLAG_MANAGED) || link->status == DL_STATE_ACTIVE || sup->state_synced || !dev_has_sync_state(sup)) return 0; if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) { - dev_warn(sup, "sync_state() pending due to %s\n", + dev_info(sup, "sync_state() pending due to %s\n", dev_name(link->consumer)); return 0; } @@ -1881,8 +1880,6 @@ static void fw_devlink_unblock_consumers(struct device *dev) device_links_write_unlock(); } -#define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) - static bool fwnode_init_without_drv(struct fwnode_handle *fwnode) { struct device *dev; @@ -1971,7 +1968,7 @@ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwn /** * __fw_devlink_relax_cycles - Relax and mark dependency cycles. - * @con: Potential consumer device. + * @con_handle: Potential consumer device fwnode. * @sup_handle: Potential supplier's fwnode. * * Needs to be called with fwnode_lock and device link lock held. @@ -1989,10 +1986,10 @@ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwn * * Return true if one or more cycles were found. Otherwise, return false. */ -static bool __fw_devlink_relax_cycles(struct device *con, +static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, struct fwnode_handle *sup_handle) { - struct device *sup_dev = NULL, *par_dev = NULL; + struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL; struct fwnode_link *link; struct device_link *dev_link; bool ret = false; @@ -2009,22 +2006,22 @@ static bool __fw_devlink_relax_cycles(struct device *con, sup_handle->flags |= FWNODE_FLAG_VISITED; - sup_dev = get_dev_from_fwnode(sup_handle); - /* Termination condition. */ - if (sup_dev == con) { + if (sup_handle == con_handle) { pr_debug("----- cycle: start -----\n"); ret = true; goto out; } + sup_dev = get_dev_from_fwnode(sup_handle); + con_dev = get_dev_from_fwnode(con_handle); /* * If sup_dev is bound to a driver and @con hasn't started binding to a * driver, sup_dev can't be a consumer of @con. So, no need to check * further. */ if (sup_dev && sup_dev->links.status == DL_DEV_DRIVER_BOUND && - con->links.status == DL_DEV_NO_DRIVER) { + con_dev && con_dev->links.status == DL_DEV_NO_DRIVER) { ret = false; goto out; } @@ -2033,7 +2030,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, if (link->flags & FWLINK_FLAG_IGNORE) continue; - if (__fw_devlink_relax_cycles(con, link->supplier)) { + if (__fw_devlink_relax_cycles(con_handle, link->supplier)) { __fwnode_link_cycle(link); ret = true; } @@ -2048,7 +2045,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, else par_dev = fwnode_get_next_parent_dev(sup_handle); - if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) { + if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) { pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle, par_dev->fwnode); ret = true; @@ -2063,10 +2060,10 @@ static bool __fw_devlink_relax_cycles(struct device *con, * such due to a cycle. */ if (device_link_flag_is_sync_state_only(dev_link->flags) && - !(dev_link->flags & DL_FLAG_CYCLE)) + !device_link_test(dev_link, DL_FLAG_CYCLE)) continue; - if (__fw_devlink_relax_cycles(con, + if (__fw_devlink_relax_cycles(con_handle, dev_link->supplier->fwnode)) { pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle, dev_link->supplier->fwnode); @@ -2079,6 +2076,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, out: sup_handle->flags &= ~FWNODE_FLAG_VISITED; put_device(sup_dev); + put_device(con_dev); put_device(par_dev); return ret; } @@ -2114,11 +2112,6 @@ static int fw_devlink_create_devlink(struct device *con, if (link->flags & FWLINK_FLAG_IGNORE) return 0; - if (con->fwnode == link->consumer) - flags = fw_devlink_get_flags(link->flags); - else - flags = FW_DEVLINK_FLAGS_PERMISSIVE; - /* * In some cases, a device P might also be a supplier to its child node * C. However, this would defer the probe of C until the probe of P @@ -2139,25 +2132,23 @@ static int fw_devlink_create_devlink(struct device *con, return -EINVAL; /* - * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So, one might expect that cycle detection isn't necessary for them. - * However, if the device link was marked as SYNC_STATE_ONLY because - * it's part of a cycle, then we still need to do cycle detection. This - * is because the consumer and supplier might be part of multiple cycles - * and we need to detect all those cycles. + * Don't try to optimize by not calling the cycle detection logic under + * certain conditions. There's always some corner case that won't get + * detected. */ - if (!device_link_flag_is_sync_state_only(flags) || - flags & DL_FLAG_CYCLE) { - device_links_write_lock(); - if (__fw_devlink_relax_cycles(con, sup_handle)) { - __fwnode_link_cycle(link); - flags = fw_devlink_get_flags(link->flags); - pr_debug("----- cycle: end -----\n"); - dev_info(con, "Fixed dependency cycle(s) with %pfwf\n", - sup_handle); - } - device_links_write_unlock(); + device_links_write_lock(); + if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) { + __fwnode_link_cycle(link); + pr_debug("----- cycle: end -----\n"); + pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n", + link->consumer, sup_handle); } + device_links_write_unlock(); + + if (con->fwnode == link->consumer) + flags = fw_devlink_get_flags(link->flags); + else + flags = FW_DEVLINK_FLAGS_PERMISSIVE; if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE) sup_dev = fwnode_get_next_parent_dev(sup_handle); @@ -2180,8 +2171,8 @@ static int fw_devlink_create_devlink(struct device *con, } if (con != sup_dev && !device_link_add(con, sup_dev, flags)) { - dev_err(con, "Failed to create device link (0x%x) with %s\n", - flags, dev_name(sup_dev)); + dev_err(con, "Failed to create device link (0x%x) with supplier %s for %pfwf\n", + flags, dev_name(sup_dev), link->consumer); ret = -EINVAL; } @@ -2630,6 +2621,35 @@ static const char *dev_uevent_name(const struct kobject *kobj) return NULL; } +/* + * Try filling "DRIVER=<name>" uevent variable for a device. Because this + * function may race with binding and unbinding the device from a driver, + * we need to be careful. Binding is generally safe, at worst we miss the + * fact that the device is already bound to a driver (but the driver + * information that is delivered through uevents is best-effort, it may + * become obsolete as soon as it is generated anyways). Unbinding is more + * risky as driver pointer is transitioning to NULL, so READ_ONCE() should + * be used to make sure we are dealing with the same pointer, and to + * ensure that driver structure is not going to disappear from under us + * we take bus' drivers klist lock. The assumption that only registered + * driver can be bound to a device, and to unregister a driver bus code + * will take the same lock. + */ +static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (sp) { + scoped_guard(spinlock, &sp->klist_drivers.k_lock) { + struct device_driver *drv = READ_ONCE(dev->driver); + if (drv) + add_uevent_var(env, "DRIVER=%s", drv->name); + } + + subsys_put(sp); + } +} + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); @@ -2661,8 +2681,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Add "DRIVER=%s" variable if the device is bound to a driver */ + dev_driver_uevent(dev, env); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2732,11 +2752,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; @@ -3706,7 +3723,7 @@ done: device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: - dev->driver = NULL; + device_set_driver(dev, NULL); bus_remove_device(dev); BusError: device_remove_attrs(dev); @@ -3977,8 +3994,8 @@ const char *device_get_devnode(const struct device *dev, /** * device_for_each_child - device child iterator. * @parent: parent struct device. - * @fn: function to be called for each device. * @data: data for the callback. + * @fn: function to be called for each device. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. @@ -3987,7 +4004,7 @@ const char *device_get_devnode(const struct device *dev, * other than 0, we break out and return that value. */ int device_for_each_child(struct device *parent, void *data, - int (*fn)(struct device *dev, void *data)) + device_iter_t fn) { struct klist_iter i; struct device *child; @@ -4007,8 +4024,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child); /** * device_for_each_child_reverse - device child iterator in reversed order. * @parent: parent struct device. - * @fn: function to be called for each device. * @data: data for the callback. + * @fn: function to be called for each device. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. @@ -4017,7 +4034,7 @@ EXPORT_SYMBOL_GPL(device_for_each_child); * other than 0, we break out and return that value. */ int device_for_each_child_reverse(struct device *parent, void *data, - int (*fn)(struct device *dev, void *data)) + device_iter_t fn) { struct klist_iter i; struct device *child; @@ -4038,8 +4055,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse); * device_for_each_child_reverse_from - device child iterator in reversed order. * @parent: parent struct device. * @from: optional starting point in child list - * @fn: function to be called for each device. * @data: data for the callback. + * @fn: function to be called for each device. * * Iterate over @parent's child devices, starting at @from, and call @fn * for each, passing it @data. This helper is identical to @@ -4050,14 +4067,14 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse); * device_for_each_child_reverse_from(); */ int device_for_each_child_reverse_from(struct device *parent, - struct device *from, const void *data, - int (*fn)(struct device *, const void *)) + struct device *from, void *data, + device_iter_t fn) { struct klist_iter i; struct device *child; int error = 0; - if (!parent->p) + if (!parent || !parent->p) return 0; klist_iter_init_node(&parent->p->klist_children, &i, @@ -4072,8 +4089,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from); /** * device_find_child - device iterator for locating a particular device. * @parent: parent struct device - * @match: Callback function to check device * @data: Data to pass to match function + * @match: Callback function to check device * * This is similar to the device_for_each_child() function above, but it * returns a reference to a device that is 'found' for later use, as @@ -4086,8 +4103,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from); * * NOTE: you will need to drop the reference with put_device() after use. */ -struct device *device_find_child(struct device *parent, void *data, - int (*match)(struct device *dev, void *data)) +struct device *device_find_child(struct device *parent, const void *data, + device_match_t match) { struct klist_iter i; struct device *child; @@ -4096,62 +4113,17 @@ struct device *device_find_child(struct device *parent, void *data, return NULL; klist_iter_init(&parent->p->klist_children, &i); - while ((child = next_device(&i))) - if (match(child, data) && get_device(child)) + while ((child = next_device(&i))) { + if (match(child, data)) { + get_device(child); break; + } + } klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child); -/** - * device_find_child_by_name - device iterator for locating a child device. - * @parent: parent struct device - * @name: name of the child device - * - * This is similar to the device_find_child() function above, but it - * returns a reference to a device that has the name @name. - * - * NOTE: you will need to drop the reference with put_device() after use. - */ -struct device *device_find_child_by_name(struct device *parent, - const char *name) -{ - struct klist_iter i; - struct device *child; - - if (!parent) - return NULL; - - klist_iter_init(&parent->p->klist_children, &i); - while ((child = next_device(&i))) - if (sysfs_streq(dev_name(child), name) && get_device(child)) - break; - klist_iter_exit(&i); - return child; -} -EXPORT_SYMBOL_GPL(device_find_child_by_name); - -static int match_any(struct device *dev, void *unused) -{ - return 1; -} - -/** - * device_find_any_child - device iterator for locating a child device, if any. - * @parent: parent struct device - * - * This is similar to the device_find_child() function above, but it - * returns a reference to a child device, if any. - * - * NOTE: you will need to drop the reference with put_device() after use. - */ -struct device *device_find_any_child(struct device *parent) -{ - return device_find_child(parent, NULL, match_any); -} -EXPORT_SYMBOL_GPL(device_find_any_child); - int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); @@ -4166,7 +4138,7 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; - device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + device_link_wq = alloc_workqueue("device_link_wq", WQ_PERCPU, 0); if (!device_link_wq) goto wq_err; @@ -5223,6 +5195,67 @@ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode) EXPORT_SYMBOL_GPL(set_secondary_fwnode); /** + * device_remove_of_node - Remove an of_node from a device + * @dev: device whose device tree node is being removed + */ +void device_remove_of_node(struct device *dev) +{ + dev = get_device(dev); + if (!dev) + return; + + if (!dev->of_node) + goto end; + + if (dev->fwnode == of_fwnode_handle(dev->of_node)) + dev->fwnode = NULL; + + of_node_put(dev->of_node); + dev->of_node = NULL; + +end: + put_device(dev); +} +EXPORT_SYMBOL_GPL(device_remove_of_node); + +/** + * device_add_of_node - Add an of_node to an existing device + * @dev: device whose device tree node is being added + * @of_node: of_node to add + * + * Return: 0 on success or error code on failure. + */ +int device_add_of_node(struct device *dev, struct device_node *of_node) +{ + int ret; + + if (!of_node) + return -EINVAL; + + dev = get_device(dev); + if (!dev) + return -EINVAL; + + if (dev->of_node) { + dev_err(dev, "Cannot replace node %pOF with %pOF\n", + dev->of_node, of_node); + ret = -EBUSY; + goto end; + } + + dev->of_node = of_node_get(of_node); + + if (!dev->fwnode) + dev->fwnode = of_fwnode_handle(of_node); + + ret = 0; +end: + put_device(dev); + return ret; +} +EXPORT_SYMBOL_GPL(device_add_of_node); + +/** * device_set_of_node_from_dev - reuse device-tree node of another device * @dev: device whose device-tree node is being set * @dev2: device whose device-tree node is being reused @@ -5245,21 +5278,52 @@ void device_set_node(struct device *dev, struct fwnode_handle *fwnode) } EXPORT_SYMBOL_GPL(device_set_node); +/** + * get_dev_from_fwnode - Obtain a reference count of the struct device the + * struct fwnode_handle is associated with. + * @fwnode: The pointer to the struct fwnode_handle to obtain the struct device + * reference count of. + * + * This function obtains a reference count of the device the device pointer + * embedded in the struct fwnode_handle points to. + * + * Note that the struct device pointer embedded in struct fwnode_handle does + * *not* have a reference count of the struct device itself. + * + * Hence, it is a UAF (and thus a bug) to call this function if the caller can't + * guarantee that the last reference count of the corresponding struct device is + * not dropped concurrently. + * + * This is possible since struct fwnode_handle has its own reference count and + * hence can out-live the struct device it is associated with. + */ +struct device *get_dev_from_fwnode(struct fwnode_handle *fwnode) +{ + return get_device((fwnode)->dev); +} +EXPORT_SYMBOL_GPL(get_dev_from_fwnode); + int device_match_name(struct device *dev, const void *name) { return sysfs_streq(dev_name(dev), name); } EXPORT_SYMBOL_GPL(device_match_name); +int device_match_type(struct device *dev, const void *type) +{ + return dev->type == type; +} +EXPORT_SYMBOL_GPL(device_match_type); + int device_match_of_node(struct device *dev, const void *np) { - return dev->of_node == np; + return np && dev->of_node == np; } EXPORT_SYMBOL_GPL(device_match_of_node); int device_match_fwnode(struct device *dev, const void *fwnode) { - return dev_fwnode(dev) == fwnode; + return fwnode && dev_fwnode(dev) == fwnode; } EXPORT_SYMBOL_GPL(device_match_fwnode); @@ -5271,13 +5335,13 @@ EXPORT_SYMBOL_GPL(device_match_devt); int device_match_acpi_dev(struct device *dev, const void *adev) { - return ACPI_COMPANION(dev) == adev; + return adev && ACPI_COMPANION(dev) == adev; } EXPORT_SYMBOL(device_match_acpi_dev); int device_match_acpi_handle(struct device *dev, const void *handle) { - return ACPI_HANDLE(dev) == handle; + return handle && ACPI_HANDLE(dev) == handle; } EXPORT_SYMBOL(device_match_acpi_handle); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index fdaa24bb641a..c6c57b6f61c6 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -300,13 +300,30 @@ static ssize_t print_cpus_isolated(struct device *dev, } static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); +static ssize_t housekeeping_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct cpumask *hk_mask; + + hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE); + + if (housekeeping_enabled(HK_TYPE_KERNEL_NOISE)) + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(hk_mask)); + return sysfs_emit(buf, "\n"); +} +static DEVICE_ATTR_RO(housekeeping); + #ifdef CONFIG_NO_HZ_FULL -static ssize_t print_cpus_nohz_full(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t nohz_full_show(struct device *dev, + struct device_attribute *attr, + char *buf) { - return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); + if (cpumask_available(tick_nohz_full_mask)) + return sysfs_emit(buf, "%*pbl\n", + cpumask_pr_args(tick_nohz_full_mask)); + return sysfs_emit(buf, "\n"); } -static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL); +static DEVICE_ATTR_RO(nohz_full); #endif #ifdef CONFIG_CRASH_HOTPLUG @@ -325,7 +342,7 @@ static void cpu_device_release(struct device *dev) * This is an empty function to prevent the driver core from spitting a * warning at us. Yes, I know this is directly opposite of what the * documentation for the driver core and kobjects say, and the author - * of this code has already been publically ridiculed for doing + * of this code has already been publicly ridiculed for doing * something as foolish as this. However, at this point in time, it is * the only way to handle the issue of statically allocated cpu * devices. The different architectures will have their cpu device @@ -505,6 +522,7 @@ static struct attribute *cpu_root_attrs[] = { &dev_attr_offline.attr, &dev_attr_enabled.attr, &dev_attr_isolated.attr, + &dev_attr_housekeeping.attr, #ifdef CONFIG_NO_HZ_FULL &dev_attr_nohz_full.attr, #endif @@ -599,6 +617,11 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(ghostwrite); +CPU_SHOW_VULN_FALLBACK(old_microcode); +CPU_SHOW_VULN_FALLBACK(indirect_target_selection); +CPU_SHOW_VULN_FALLBACK(tsa); +CPU_SHOW_VULN_FALLBACK(vmscape); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -614,6 +637,11 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); +static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); +static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); +static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -630,6 +658,11 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_ghostwrite.attr, + &dev_attr_old_microcode.attr, + &dev_attr_indirect_target_selection.attr, + &dev_attr_tsa.attr, + &dev_attr_vmscape.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0e4b4aba885..349f31bedfa1 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -25,6 +25,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/async.h> +#include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/pinctrl/devinfo.h> #include <linux/slab.h> @@ -192,7 +193,7 @@ void driver_deferred_probe_trigger(void) * Kick the re-probe thread. It may already be scheduled, but it is * safe to kick it again. */ - queue_work(system_unbound_wq, &deferred_probe_work); + queue_work(system_dfl_wq, &deferred_probe_work); } /** @@ -550,8 +551,9 @@ static void device_unbind_cleanup(struct device *dev) arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - dev->driver = NULL; + device_set_driver(dev, NULL); dev_set_drvdata(dev, NULL); + dev_pm_domain_detach(dev, dev->power.detach_power_off); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); pm_runtime_reinit(dev); @@ -629,8 +631,7 @@ static int really_probe(struct device *dev, const struct device_driver *drv) } re_probe: - // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + device_set_driver(dev, drv); /* If using pinctrl, bind pins now before probing */ ret = pinctrl_bind_pins(dev); @@ -1014,7 +1015,7 @@ static int __device_attach(struct device *dev, bool allow_async) if (ret == 0) ret = 1; else { - dev->driver = NULL; + device_set_driver(dev, NULL); ret = 0; } } else { @@ -1076,7 +1077,15 @@ EXPORT_SYMBOL_GPL(device_attach); void device_initial_probe(struct device *dev) { - __device_attach(dev, true); + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (!sp) + return; + + if (sp->drivers_autoprobe) + __device_attach(dev, true); + + subsys_put(sp); } /* diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index c795edad1b96..55bdc7f5e59d 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -23,50 +23,46 @@ struct devcd_entry { void *data; size_t datalen; /* - * Here, mutex is required to serialize the calls to del_wk work between - * user/kernel space which happens when devcd is added with device_add() - * and that sends uevent to user space. User space reads the uevents, - * and calls to devcd_data_write() which try to modify the work which is - * not even initialized/queued from devcoredump. + * There are 2 races for which mutex is required. * + * The first race is between device creation and userspace writing to + * schedule immediately destruction. * + * This race is handled by arming the timer before device creation, but + * when device creation fails the timer still exists. * - * cpu0(X) cpu1(Y) + * To solve this, hold the mutex during device_add(), and set + * init_completed on success before releasing the mutex. * - * dev_coredump() uevent sent to user space - * device_add() ======================> user space process Y reads the - * uevents writes to devcd fd - * which results into writes to + * That way the timer will never fire until device_add() is called, + * it will do nothing if init_completed is not set. The timer is also + * cancelled in that case. * - * devcd_data_write() - * mod_delayed_work() - * try_to_grab_pending() - * del_timer() - * debug_assert_init() - * INIT_DELAYED_WORK() - * schedule_delayed_work() - * - * - * Also, mutex alone would not be enough to avoid scheduling of - * del_wk work after it get flush from a call to devcd_free() - * mentioned as below. - * - * disabled_store() - * devcd_free() - * mutex_lock() devcd_data_write() - * flush_delayed_work() - * mutex_unlock() - * mutex_lock() - * mod_delayed_work() - * mutex_unlock() - * So, delete_work flag is required. + * The second race involves multiple parallel invocations of devcd_free(), + * add a deleted flag so only 1 can call the destructor. */ struct mutex mutex; - bool delete_work; + bool init_completed, deleted; struct module *owner; ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen); void (*free)(void *data); + /* + * If nothing interferes and device_add() was returns success, + * del_wk will destroy the device after the timer fires. + * + * Multiple userspace processes can interfere in the working of the timer: + * - Writing to the coredump will reschedule the timer to run immediately, + * if still armed. + * + * This is handled by using "if (cancel_delayed_work()) { + * schedule_delayed_work() }", to prevent re-arming after having + * been previously fired. + * - Writing to /sys/class/devcoredump/disabled will destroy the + * coredump synchronously. + * This is handled by using disable_delayed_work_sync(), and then + * checking if deleted flag is set with &devcd->mutex held. + */ struct delayed_work del_wk; struct device *failing_dev; }; @@ -95,18 +91,31 @@ static void devcd_dev_release(struct device *dev) kfree(devcd); } +static void __devcd_del(struct devcd_entry *devcd) +{ + devcd->deleted = true; + device_del(&devcd->devcd_dev); + put_device(&devcd->devcd_dev); +} + static void devcd_del(struct work_struct *wk) { struct devcd_entry *devcd; + bool init_completed; devcd = container_of(wk, struct devcd_entry, del_wk.work); - device_del(&devcd->devcd_dev); - put_device(&devcd->devcd_dev); + /* devcd->mutex serializes against dev_coredumpm_timeout */ + mutex_lock(&devcd->mutex); + init_completed = devcd->init_completed; + mutex_unlock(&devcd->mutex); + + if (init_completed) + __devcd_del(devcd); } static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -116,30 +125,26 @@ static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, } static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); struct devcd_entry *devcd = dev_to_devcd(dev); - mutex_lock(&devcd->mutex); - if (!devcd->delete_work) { - devcd->delete_work = true; - mod_delayed_work(system_wq, &devcd->del_wk, 0); - } - mutex_unlock(&devcd->mutex); + /* + * Although it's tempting to use mod_delayed work here, + * that will cause a reschedule if the timer already fired. + */ + if (cancel_delayed_work(&devcd->del_wk)) + schedule_delayed_work(&devcd->del_wk, 0); return count; } -static struct bin_attribute devcd_attr_data = { - .attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, }, - .size = 0, - .read = devcd_data_read, - .write = devcd_data_write, -}; +static const struct bin_attribute devcd_attr_data = + __BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0); -static struct bin_attribute *devcd_dev_bin_attrs[] = { +static const struct bin_attribute *const devcd_dev_bin_attrs[] = { &devcd_attr_data, NULL, }; @@ -155,11 +160,21 @@ static int devcd_free(struct device *dev, void *data) { struct devcd_entry *devcd = dev_to_devcd(dev); + /* + * To prevent a race with devcd_data_write(), disable work and + * complete manually instead. + * + * We cannot rely on the return value of + * disable_delayed_work_sync() here, because it might be in the + * middle of a cancel_delayed_work + schedule_delayed_work pair. + * + * devcd->mutex here guards against multiple parallel invocations + * of devcd_free(). + */ + disable_delayed_work_sync(&devcd->del_wk); mutex_lock(&devcd->mutex); - if (!devcd->delete_work) - devcd->delete_work = true; - - flush_delayed_work(&devcd->del_wk); + if (!devcd->deleted) + __devcd_del(devcd); mutex_unlock(&devcd->mutex); return 0; } @@ -183,12 +198,10 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri * put_device() <- last reference * error = fn(dev, data) devcd_dev_release() * devcd_free(dev, data) kfree(devcd) - * mutex_lock(&devcd->mutex); * * - * In the above diagram, It looks like disabled_store() would be racing with parallely - * running devcd_del() and result in memory abort while acquiring devcd->mutex which - * is called after kfree of devcd memory after dropping its last reference with + * In the above diagram, it looks like disabled_store() would be racing with parallelly + * running devcd_del() and result in memory abort after dropping its last reference with * put_device(). However, this will not happens as fn(dev, data) runs * with its own reference to device via klist_node so it is not its last reference. * so, above situation would not occur. @@ -285,6 +298,8 @@ static void devcd_free_sgtable(void *data) * @offset: start copy from @offset@ bytes from the head of the data * in the given scatterlist * @data_len: the length of the data in the sg_table + * + * Returns: the number of bytes copied */ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset, size_t buf_len, void *data, @@ -376,7 +391,7 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, devcd->read = read; devcd->free = free; devcd->failing_dev = get_device(dev); - devcd->delete_work = false; + devcd->deleted = false; mutex_init(&devcd->mutex); device_initialize(&devcd->devcd_dev); @@ -385,8 +400,14 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, atomic_inc_return(&devcd_count)); devcd->devcd_dev.class = &devcd_class; - mutex_lock(&devcd->mutex); dev_set_uevent_suppress(&devcd->devcd_dev, true); + + /* devcd->mutex prevents devcd_del() completing until init finishes */ + mutex_lock(&devcd->mutex); + devcd->init_completed = false; + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); + schedule_delayed_work(&devcd->del_wk, timeout); + if (device_add(&devcd->devcd_dev)) goto put_device; @@ -403,13 +424,20 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); - INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, timeout); + + /* + * Safe to run devcd_del() now that we are done with devcd_dev. + * Alternatively we could have taken a ref on devcd_dev before + * dropping the lock. + */ + devcd->init_completed = true; mutex_unlock(&devcd->mutex); return; put_device: - put_device(&devcd->devcd_dev); mutex_unlock(&devcd->mutex); + cancel_delayed_work_sync(&devcd->del_wk); + put_device(&devcd->devcd_dev); + put_module: module_put(owner); free: diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 2152eec0c135..f54db6d138ab 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -576,7 +576,10 @@ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) } EXPORT_SYMBOL_GPL(devres_open_group); -/* Find devres group with ID @id. If @id is NULL, look for the latest. */ +/* + * Find devres group with ID @id. If @id is NULL, look for the latest open + * group. + */ static struct devres_group *find_group(struct device *dev, void *id) { struct devres_node *node; @@ -687,6 +690,13 @@ int devres_release_group(struct device *dev, void *id) spin_unlock_irqrestore(&dev->devres_lock, flags); release_nodes(dev, &todo); + } else if (list_empty(&dev->devres_head)) { + /* + * dev is probably dying via devres_release_all(): groups + * have already been removed and are on the process of + * being released - don't touch and don't warn. + */ + spin_unlock_irqrestore(&dev->devres_lock, flags); } else { WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); @@ -749,26 +759,50 @@ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, co } EXPORT_SYMBOL_GPL(__devm_add_action); +bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data) +{ + struct action_devres devres = { + .data = data, + .action = action, + }; + + return devres_find(dev, devm_action_release, devm_action_match, &devres); +} +EXPORT_SYMBOL_GPL(devm_is_action_added); + /** - * devm_remove_action() - removes previously added custom action + * devm_remove_action_nowarn() - removes previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Removes instance of @action previously added by devm_add_action(). * Both action and data should match one of the existing entries. + * + * In contrast to devm_remove_action(), this function does not WARN() if no + * entry could have been found. + * + * This should only be used if the action is contained in an object with + * independent lifetime management, e.g. the Devres rust abstraction. + * + * Causing the warning from regular driver code most likely indicates an abuse + * of the devres API. + * + * Returns: 0 on success, -ENOENT if no entry could have been found. */ -void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +int devm_remove_action_nowarn(struct device *dev, + void (*action)(void *), + void *data) { struct action_devres devres = { .data = data, .action = action, }; - WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match, - &devres)); + return devres_destroy(dev, devm_action_release, devm_action_match, + &devres); } -EXPORT_SYMBOL_GPL(devm_remove_action); +EXPORT_SYMBOL_GPL(devm_remove_action_nowarn); /** * devm_release_action() - release previously added custom action @@ -953,17 +987,10 @@ EXPORT_SYMBOL_GPL(devm_krealloc); */ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) { - size_t size; - char *buf; - if (!s) return NULL; - size = strlen(s) + 1; - buf = devm_kmalloc(dev, size, gfp); - if (buf) - memcpy(buf, s, size); - return buf; + return devm_kmemdup(dev, s, strlen(s) + 1, gfp); } EXPORT_SYMBOL_GPL(devm_kstrdup); @@ -1090,6 +1117,27 @@ void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) } EXPORT_SYMBOL_GPL(devm_kmemdup); +/** + * devm_kmemdup_const - conditionally duplicate and manage a region of memory + * + * @dev: Device this memory belongs to + * @src: memory region to duplicate + * @len: memory region length, + * @gfp: GFP mask to use + * + * Return: source address if it is in .rodata or the return value of kmemdup() + * to which the function falls back otherwise. + */ +const void * +devm_kmemdup_const(struct device *dev, const void *src, size_t len, gfp_t gfp) +{ + if (is_kernel_rodata((unsigned long)src)) + return src; + + return devm_kmemdup(dev, src, len, gfp); +} +EXPORT_SYMBOL_GPL(devm_kmemdup_const); + struct pages_devres { unsigned long addr; unsigned int order; @@ -1174,13 +1222,6 @@ static void devm_percpu_release(struct device *dev, void *pdata) free_percpu(p); } -static int devm_percpu_match(struct device *dev, void *data, void *p) -{ - struct devres *devr = container_of(data, struct devres, data); - - return *(void **)devr->data == p; -} - /** * __devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for @@ -1216,21 +1257,3 @@ void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, return pcpu; } EXPORT_SYMBOL_GPL(__devm_alloc_percpu); - -/** - * devm_free_percpu - Resource-managed free_percpu - * @dev: Device this memory belongs to - * @pdata: Per-cpu memory to free - * - * Free memory allocated with devm_alloc_percpu(). - */ -void devm_free_percpu(struct device *dev, void __percpu *pdata) -{ - /* - * Use devres_release() to prevent memory leakage as - * devm_free_pages() does. - */ - WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match, - (void *)(__force unsigned long)pdata)); -} -EXPORT_SYMBOL_GPL(devm_free_percpu); diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index b848764ef018..194b44075ac7 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -63,22 +63,6 @@ __setup("devtmpfs.mount=", mount_param); static struct vfsmount *mnt; -static struct dentry *public_dev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - struct super_block *s = mnt->mnt_sb; - int err; - - atomic_inc(&s->s_active); - down_write(&s->s_umount); - err = reconfigure_single(s, flags, data); - if (err < 0) { - deactivate_locked_super(s); - return ERR_PTR(err); - } - return dget(s->s_root); -} - static struct file_system_type internal_fs_type = { .name = "devtmpfs", #ifdef CONFIG_TMPFS @@ -86,12 +70,43 @@ static struct file_system_type internal_fs_type = { #else .init_fs_context = ramfs_init_fs_context, #endif - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; +/* Simply take a ref on the existing mount */ +static int devtmpfs_get_tree(struct fs_context *fc) +{ + struct super_block *sb = mnt->mnt_sb; + + atomic_inc(&sb->s_active); + down_write(&sb->s_umount); + fc->root = dget(sb->s_root); + return 0; +} + +/* Ops are filled in during init depending on underlying shmem or ramfs type */ +struct fs_context_operations devtmpfs_context_ops = {}; + +/* Call the underlying initialization and set to our ops */ +static int devtmpfs_init_fs_context(struct fs_context *fc) +{ + int ret; +#ifdef CONFIG_TMPFS + ret = shmem_init_fs_context(fc); +#else + ret = ramfs_init_fs_context(fc); +#endif + if (ret < 0) + return ret; + + fc->ops = &devtmpfs_context_ops; + + return 0; +} + static struct file_system_type dev_fs_type = { .name = "devtmpfs", - .mount = public_dev_mount, + .init_fs_context = devtmpfs_init_fs_context, }; static int devtmpfs_submit_req(struct req *req, const char *tmp) @@ -160,18 +175,17 @@ static int dev_mkdir(const char *name, umode_t mode) { struct dentry *dentry; struct path path; - int err; - dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); + dentry = start_creating_path(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); - err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); - if (!err) + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, NULL); + if (!IS_ERR(dentry)) /* mark as kernel-created inode */ d_inode(dentry)->i_private = &thread; - done_path_create(&path, dentry); - return err; + end_creating_path(&path, dentry); + return PTR_ERR_OR_ZERO(dentry); } static int create_path(const char *nodepath) @@ -208,16 +222,16 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid, struct path path; int err; - dentry = kern_path_create(AT_FDCWD, nodename, &path, 0); + dentry = start_creating_path(AT_FDCWD, nodename, &path, 0); if (dentry == ERR_PTR(-ENOENT)) { create_path(nodename); - dentry = kern_path_create(AT_FDCWD, nodename, &path, 0); + dentry = start_creating_path(AT_FDCWD, nodename, &path, 0); } if (IS_ERR(dentry)) return PTR_ERR(dentry); err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, - dev->devt); + dev->devt, NULL); if (!err) { struct iattr newattrs; @@ -232,7 +246,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid, /* mark as kernel-created inode */ d_inode(dentry)->i_private = &thread; } - done_path_create(&path, dentry); + end_creating_path(&path, dentry); return err; } @@ -242,21 +256,16 @@ static int dev_rmdir(const char *name) struct dentry *dentry; int err; - dentry = kern_path_locked(name, &parent); + dentry = start_removing_path(name, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - if (d_really_is_positive(dentry)) { - if (d_inode(dentry)->i_private == &thread) - err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), - dentry); - else - err = -EPERM; - } else { - err = -ENOENT; - } - dput(dentry); - inode_unlock(d_inode(parent.dentry)); - path_put(&parent); + if (d_inode(dentry)->i_private == &thread) + err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), + dentry, NULL); + else + err = -EPERM; + + end_removing_path(&parent, dentry); return err; } @@ -285,7 +294,7 @@ static int delete_path(const char *nodepath) return err; } -static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) +static int dev_mynode(struct device *dev, struct inode *inode) { /* did we create it */ if (inode->i_private != &thread) @@ -293,13 +302,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta /* does the dev_t match */ if (is_blockdev(dev)) { - if (!S_ISBLK(stat->mode)) + if (!S_ISBLK(inode->i_mode)) return 0; } else { - if (!S_ISCHR(stat->mode)) + if (!S_ISCHR(inode->i_mode)) return 0; } - if (stat->rdev != dev->devt) + if (inode->i_rdev != dev->devt) return 0; /* ours */ @@ -310,44 +319,36 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; + struct inode *inode; int deleted = 0; - int err; + int err = 0; - dentry = kern_path_locked(nodename, &parent); + dentry = start_removing_path(nodename, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - if (d_really_is_positive(dentry)) { - struct kstat stat; - struct path p = {.mnt = parent.mnt, .dentry = dentry}; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { - struct iattr newattrs; - /* - * before unlinking this node, reset permissions - * of possible references like hardlinks - */ - newattrs.ia_uid = GLOBAL_ROOT_UID; - newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; - newattrs.ia_valid = - ATTR_UID|ATTR_GID|ATTR_MODE; - inode_lock(d_inode(dentry)); - notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); - inode_unlock(d_inode(dentry)); - err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), - dentry, NULL); - if (!err || err == -ENOENT) - deleted = 1; - } - } else { - err = -ENOENT; + inode = d_inode(dentry); + if (dev_mynode(dev, inode)) { + struct iattr newattrs; + /* + * before unlinking this node, reset permissions + * of possible references like hardlinks + */ + newattrs.ia_uid = GLOBAL_ROOT_UID; + newattrs.ia_gid = GLOBAL_ROOT_GID; + newattrs.ia_mode = inode->i_mode & ~0777; + newattrs.ia_valid = + ATTR_UID|ATTR_GID|ATTR_MODE; + inode_lock(d_inode(dentry)); + notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); + inode_unlock(d_inode(dentry)); + err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), + dentry, NULL); + if (!err || err == -ENOENT) + deleted = 1; } - dput(dentry); - inode_unlock(d_inode(parent.dentry)); + end_removing_path(&parent, dentry); - path_put(&parent); if (deleted && strchr(nodename, '/')) delete_path(nodename); return err; @@ -443,6 +444,31 @@ static int __ref devtmpfsd(void *p) } /* + * Get the underlying (shmem/ramfs) context ops to build ours + */ +static int devtmpfs_configure_context(void) +{ + struct fs_context *fc; + + fc = fs_context_for_reconfigure(mnt->mnt_root, mnt->mnt_sb->s_flags, + MS_RMT_MASK); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + /* Set up devtmpfs_context_ops based on underlying type */ + devtmpfs_context_ops.free = fc->ops->free; + devtmpfs_context_ops.dup = fc->ops->dup; + devtmpfs_context_ops.parse_param = fc->ops->parse_param; + devtmpfs_context_ops.parse_monolithic = fc->ops->parse_monolithic; + devtmpfs_context_ops.get_tree = &devtmpfs_get_tree; + devtmpfs_context_ops.reconfigure = fc->ops->reconfigure; + + put_fs_context(fc); + + return 0; +} + +/* * Create devtmpfs instance, driver-core devices will add their device * nodes here. */ @@ -456,6 +482,13 @@ int __init devtmpfs_init(void) pr_err("unable to create devtmpfs %ld\n", PTR_ERR(mnt)); return PTR_ERR(mnt); } + + err = devtmpfs_configure_context(); + if (err) { + pr_err("unable to configure devtmpfs type %d\n", err); + return err; + } + err = register_filesystem(&dev_fs_type); if (err) { pr_err("unable to register devtmpfs type %d\n", err); diff --git a/drivers/base/driver.c b/drivers/base/driver.c index b4eb5b89c4ee..8ab010ddf709 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(driver_set_override); * Iterate over the @drv's list of devices calling @fn for each one. */ int driver_for_each_device(struct device_driver *drv, struct device *start, - void *data, int (*fn)(struct device *, void *)) + void *data, device_iter_t fn) { struct klist_iter i; struct device *dev; @@ -160,9 +160,12 @@ struct device *driver_find_device(const struct device_driver *drv, klist_iter_init_node(&drv->p->klist_devices, &i, (start ? &start->p->knode_driver : NULL)); - while ((dev = next_device(&i))) - if (match(dev, data) && get_device(dev)) + while ((dev = next_device(&i))) { + if (match(dev, data)) { + get_device(dev); break; + } + } klist_iter_exit(&i); return dev; } diff --git a/drivers/base/faux.c b/drivers/base/faux.c new file mode 100644 index 000000000000..21dd02124231 --- /dev/null +++ b/drivers/base/faux.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org> + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#include <linux/err.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/container_of.h> +#include <linux/device/faux.h> +#include "base.h" + +/* + * Internal wrapper structure so we can hold a pointer to the + * faux_device_ops for this device. + */ +struct faux_object { + struct faux_device faux_dev; + const struct faux_device_ops *faux_ops; + const struct attribute_group **groups; +}; +#define to_faux_object(dev) container_of_const(dev, struct faux_object, faux_dev.dev) + +static struct device faux_bus_root = { + .init_name = "faux", +}; + +static int faux_match(struct device *dev, const struct device_driver *drv) +{ + /* Match always succeeds, we only have one driver */ + return 1; +} + +static int faux_probe(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + struct faux_device *faux_dev = &faux_obj->faux_dev; + const struct faux_device_ops *faux_ops = faux_obj->faux_ops; + int ret; + + if (faux_ops && faux_ops->probe) { + ret = faux_ops->probe(faux_dev); + if (ret) + return ret; + } + + /* + * Add groups after the probe succeeds to ensure resources are + * initialized correctly + */ + ret = device_add_groups(dev, faux_obj->groups); + if (ret && faux_ops && faux_ops->remove) + faux_ops->remove(faux_dev); + + return ret; +} + +static void faux_remove(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + struct faux_device *faux_dev = &faux_obj->faux_dev; + const struct faux_device_ops *faux_ops = faux_obj->faux_ops; + + device_remove_groups(dev, faux_obj->groups); + + if (faux_ops && faux_ops->remove) + faux_ops->remove(faux_dev); +} + +static const struct bus_type faux_bus_type = { + .name = "faux", + .match = faux_match, + .probe = faux_probe, + .remove = faux_remove, +}; + +static struct device_driver faux_driver = { + .name = "faux_driver", + .bus = &faux_bus_type, + .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, +}; + +static void faux_device_release(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + + kfree(faux_obj); +} + +/** + * faux_device_create_with_groups - Create and register with the driver + * core a faux device and populate the device with an initial + * set of sysfs attributes. + * @name: The name of the device we are adding, must be unique for + * all faux devices. + * @parent: Pointer to a potential parent struct device. If set to + * NULL, the device will be created in the "root" of the faux + * device tree in sysfs. + * @faux_ops: struct faux_device_ops that the new device will call back + * into, can be NULL. + * @groups: The set of sysfs attributes that will be created for this + * device when it is registered with the driver core. + * + * Create a new faux device and register it in the driver core properly. + * If present, callbacks in @faux_ops will be called with the device that + * for the caller to do something with at the proper time given the + * device's lifecycle. + * + * Note, when this function is called, the functions specified in struct + * faux_ops can be called before the function returns, so be prepared for + * everything to be properly initialized before that point in time. If the + * probe callback (if one is present) does NOT succeed, the creation of the + * device will fail and NULL will be returned. + * + * Return: + * * NULL if an error happened with creating the device + * * pointer to a valid struct faux_device that is registered with sysfs + */ +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups) +{ + struct faux_object *faux_obj; + struct faux_device *faux_dev; + struct device *dev; + int ret; + + faux_obj = kzalloc(sizeof(*faux_obj), GFP_KERNEL); + if (!faux_obj) + return NULL; + + /* Save off the callbacks and groups so we can use them in the future */ + faux_obj->faux_ops = faux_ops; + faux_obj->groups = groups; + + /* Initialize the device portion and register it with the driver core */ + faux_dev = &faux_obj->faux_dev; + dev = &faux_dev->dev; + + device_initialize(dev); + dev->release = faux_device_release; + if (parent) + dev->parent = parent; + else + dev->parent = &faux_bus_root; + dev->bus = &faux_bus_type; + dev_set_name(dev, "%s", name); + device_set_pm_not_required(dev); + + ret = device_add(dev); + if (ret) { + pr_err("%s: device_add for faux device '%s' failed with %d\n", + __func__, name, ret); + put_device(dev); + return NULL; + } + + /* + * Verify that we did bind the driver to the device (i.e. probe worked), + * if not, let's fail the creation as trying to guess if probe was + * successful is almost impossible to determine by the caller. + */ + if (!dev->driver) { + dev_dbg(dev, "probe did not succeed, tearing down the device\n"); + faux_device_destroy(faux_dev); + faux_dev = NULL; + } + + return faux_dev; +} +EXPORT_SYMBOL_GPL(faux_device_create_with_groups); + +/** + * faux_device_create - create and register with the driver core a faux device + * @name: The name of the device we are adding, must be unique for all + * faux devices. + * @parent: Pointer to a potential parent struct device. If set to + * NULL, the device will be created in the "root" of the faux + * device tree in sysfs. + * @faux_ops: struct faux_device_ops that the new device will call back + * into, can be NULL. + * + * Create a new faux device and register it in the driver core properly. + * If present, callbacks in @faux_ops will be called with the device that + * for the caller to do something with at the proper time given the + * device's lifecycle. + * + * Note, when this function is called, the functions specified in struct + * faux_ops can be called before the function returns, so be prepared for + * everything to be properly initialized before that point in time. + * + * Return: + * * NULL if an error happened with creating the device + * * pointer to a valid struct faux_device that is registered with sysfs + */ +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops) +{ + return faux_device_create_with_groups(name, parent, faux_ops, NULL); +} +EXPORT_SYMBOL_GPL(faux_device_create); + +/** + * faux_device_destroy - destroy a faux device + * @faux_dev: faux device to destroy + * + * Unregisters and cleans up a device that was created with a call to + * faux_device_create() + */ +void faux_device_destroy(struct faux_device *faux_dev) +{ + struct device *dev = &faux_dev->dev; + + if (!faux_dev) + return; + + device_del(dev); + + /* The final put_device() will clean up the memory we allocated for this device. */ + put_device(dev); +} +EXPORT_SYMBOL_GPL(faux_device_destroy); + +int __init faux_bus_init(void) +{ + int ret; + + ret = device_register(&faux_bus_root); + if (ret) { + put_device(&faux_bus_root); + return ret; + } + + ret = bus_register(&faux_bus_type); + if (ret) + goto error_bus; + + ret = driver_register(&faux_driver); + if (ret) + goto error_driver; + + return ret; + +error_driver: + bus_unregister(&faux_bus_type); + +error_bus: + device_unregister(&faux_bus_root); + return ret; +} diff --git a/drivers/base/firmware_loader/Kconfig b/drivers/base/firmware_loader/Kconfig index a03701674265..15eff8a4b505 100644 --- a/drivers/base/firmware_loader/Kconfig +++ b/drivers/base/firmware_loader/Kconfig @@ -3,8 +3,7 @@ menu "Firmware loader" config FW_LOADER tristate "Firmware loading facility" if EXPERT - select CRYPTO_HASH if FW_LOADER_DEBUG - select CRYPTO_SHA256 if FW_LOADER_DEBUG + select CRYPTO_LIB_SHA256 if FW_LOADER_DEBUG default y help This enables the firmware loading facility in the kernel. The kernel @@ -28,7 +27,6 @@ config FW_LOADER config FW_LOADER_DEBUG bool "Log filenames and checksums for loaded firmware" - depends on CRYPTO = FW_LOADER || CRYPTO=y depends on DYNAMIC_DEBUG depends on FW_LOADER default FW_LOADER @@ -40,7 +38,7 @@ config FW_LOADER_DEBUG config RUST_FW_LOADER_ABSTRACTIONS bool "Rust Firmware Loader abstractions" depends on RUST - depends on FW_LOADER=y + select FW_LOADER help This enables the Rust abstractions for the firmware loader API. diff --git a/drivers/base/firmware_loader/builtin/main.c b/drivers/base/firmware_loader/builtin/main.c index a065c3150897..d36befebb1b9 100644 --- a/drivers/base/firmware_loader/builtin/main.c +++ b/drivers/base/firmware_loader/builtin/main.c @@ -61,7 +61,7 @@ bool firmware_request_builtin(struct firmware *fw, const char *name) return false; } -EXPORT_SYMBOL_NS_GPL(firmware_request_builtin, TEST_FIRMWARE); +EXPORT_SYMBOL_NS_GPL(firmware_request_builtin, "TEST_FIRMWARE"); /** * firmware_request_builtin_buf() - load builtin firmware into optional buffer diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c index 8432ab2c3b3c..c8afc501a8a4 100644 --- a/drivers/base/firmware_loader/fallback_table.c +++ b/drivers/base/firmware_loader/fallback_table.c @@ -22,10 +22,10 @@ struct firmware_fallback_config fw_fallback_config = { .loading_timeout = 60, .old_timeout = 60, }; -EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE); +EXPORT_SYMBOL_NS_GPL(fw_fallback_config, "FIRMWARE_LOADER_PRIVATE"); #ifdef CONFIG_SYSCTL -static struct ctl_table firmware_config_table[] = { +static const struct ctl_table firmware_config_table[] = { { .procname = "force_sysfs_fallback", .data = &fw_fallback_config.force_sysfs_fallback, @@ -56,13 +56,13 @@ int register_firmware_config_sysctl(void) return -ENOMEM; return 0; } -EXPORT_SYMBOL_NS_GPL(register_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE); +EXPORT_SYMBOL_NS_GPL(register_firmware_config_sysctl, "FIRMWARE_LOADER_PRIVATE"); void unregister_firmware_config_sysctl(void) { unregister_sysctl_table(firmware_config_sysct_table_header); firmware_config_sysct_table_header = NULL; } -EXPORT_SYMBOL_NS_GPL(unregister_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE); +EXPORT_SYMBOL_NS_GPL(unregister_firmware_config_sysctl, "FIRMWARE_LOADER_PRIVATE"); #endif /* CONFIG_SYSCTL */ diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 324a9a3c087a..4ebdca9e4da4 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -806,42 +806,15 @@ static void fw_abort_batch_reqs(struct firmware *fw) } #if defined(CONFIG_FW_LOADER_DEBUG) -#include <crypto/hash.h> #include <crypto/sha2.h> static void fw_log_firmware_info(const struct firmware *fw, const char *name, struct device *device) { - struct shash_desc *shash; - struct crypto_shash *alg; - u8 *sha256buf; - char *outbuf; + u8 digest[SHA256_DIGEST_SIZE]; - alg = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(alg)) - return; - - sha256buf = kmalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); - outbuf = kmalloc(SHA256_BLOCK_SIZE + 1, GFP_KERNEL); - shash = kmalloc(sizeof(*shash) + crypto_shash_descsize(alg), GFP_KERNEL); - if (!sha256buf || !outbuf || !shash) - goto out_free; - - shash->tfm = alg; - - if (crypto_shash_digest(shash, fw->data, fw->size, sha256buf) < 0) - goto out_shash; - - for (int i = 0; i < SHA256_DIGEST_SIZE; i++) - sprintf(&outbuf[i * 2], "%02x", sha256buf[i]); - outbuf[SHA256_BLOCK_SIZE] = 0; - dev_dbg(device, "Loaded FW: %s, sha256: %s\n", name, outbuf); - -out_shash: - crypto_free_shash(alg); -out_free: - kfree(shash); - kfree(outbuf); - kfree(sha256buf); + sha256(fw->data, fw->size, digest); + dev_dbg(device, "Loaded FW: %s, sha256: %*phN\n", + name, SHA256_DIGEST_SIZE, digest); } #else static void fw_log_firmware_info(const struct firmware *fw, const char *name, @@ -849,26 +822,6 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name, {} #endif -/* - * Reject firmware file names with ".." path components. - * There are drivers that construct firmware file names from device-supplied - * strings, and we don't want some device to be able to tell us "I would like to - * be sent my firmware from ../../../etc/shadow, please". - * - * Search for ".." surrounded by either '/' or start/end of string. - * - * This intentionally only looks at the firmware name, not at the firmware base - * directory or at symlink contents. - */ -static bool name_contains_dotdot(const char *name) -{ - size_t name_len = strlen(name); - - return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 || - strstr(name, "/../") != NULL || - (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0); -} - /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, @@ -876,8 +829,6 @@ _request_firmware(const struct firmware **firmware_p, const char *name, size_t offset, u32 opt_flags) { struct firmware *fw = NULL; - struct cred *kern_cred = NULL; - const struct cred *old_cred; bool nondirect = false; int ret; @@ -889,6 +840,17 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } + + /* + * Reject firmware file names with ".." path components. + * There are drivers that construct firmware file names from + * device-supplied strings, and we don't want some device to be + * able to tell us "I would like to be sent my firmware from + * ../../../etc/shadow, please". + * + * This intentionally only looks at the firmware name, not at + * the firmware base directory or at symlink contents. + */ if (name_contains_dotdot(name)) { dev_warn(device, "Firmware load for '%s' refused, path contains '..' component\n", @@ -907,45 +869,38 @@ _request_firmware(const struct firmware **firmware_p, const char *name, * called by a driver when serving an unrelated request from userland, we use * the kernel credentials to read the file. */ - kern_cred = prepare_kernel_cred(&init_task); - if (!kern_cred) { - ret = -ENOMEM; - goto out; - } - old_cred = override_creds(kern_cred); - - ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); + scoped_with_kernel_creds() { + ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); - /* Only full reads can support decompression, platform, and sysfs. */ - if (!(opt_flags & FW_OPT_PARTIAL)) - nondirect = true; + /* Only full reads can support decompression, platform, and sysfs. */ + if (!(opt_flags & FW_OPT_PARTIAL)) + nondirect = true; #ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD - if (ret == -ENOENT && nondirect) - ret = fw_get_filesystem_firmware(device, fw->priv, ".zst", - fw_decompress_zstd); + if (ret == -ENOENT && nondirect) + ret = fw_get_filesystem_firmware(device, fw->priv, ".zst", + fw_decompress_zstd); #endif #ifdef CONFIG_FW_LOADER_COMPRESS_XZ - if (ret == -ENOENT && nondirect) - ret = fw_get_filesystem_firmware(device, fw->priv, ".xz", - fw_decompress_xz); + if (ret == -ENOENT && nondirect) + ret = fw_get_filesystem_firmware(device, fw->priv, ".xz", + fw_decompress_xz); #endif - if (ret == -ENOENT && nondirect) - ret = firmware_fallback_platform(fw->priv); + if (ret == -ENOENT && nondirect) + ret = firmware_fallback_platform(fw->priv); - if (ret) { - if (!(opt_flags & FW_OPT_NO_WARN)) - dev_warn(device, - "Direct firmware load for %s failed with error %d\n", - name, ret); - if (nondirect) - ret = firmware_fallback_sysfs(fw, name, device, - opt_flags, ret); - } else - ret = assign_fw(fw, device); - - revert_creds(old_cred); - put_cred(kern_cred); + if (ret) { + if (!(opt_flags & FW_OPT_NO_WARN)) + dev_warn(device, + "Direct firmware load for %s failed with error %d\n", + name, ret); + if (nondirect) + ret = firmware_fallback_sysfs(fw, name, device, + opt_flags, ret); + } else { + ret = assign_fw(fw, device); + } + } out: if (ret < 0) { @@ -1075,8 +1030,8 @@ EXPORT_SYMBOL_GPL(firmware_request_platform); /** * firmware_request_cache() - cache firmware for suspend so resume can use it - * @name: name of firmware file * @device: device for which firmware should be cached for + * @name: name of firmware file * * There are some devices with an optimization that enables the device to not * require loading firmware on system reboot. This optimization may still @@ -1621,16 +1576,20 @@ static int fw_pm_notify(struct notifier_block *notify_block, } /* stop caching firmware once syscore_suspend is reached */ -static int fw_suspend(void) +static int fw_suspend(void *data) { fw_cache.state = FW_LOADER_NO_CACHE; return 0; } -static struct syscore_ops fw_syscore_ops = { +static const struct syscore_ops fw_syscore_ops = { .suspend = fw_suspend, }; +static struct syscore fw_syscore = { + .ops = &fw_syscore_ops, +}; + static int __init register_fw_pm_ops(void) { int ret; @@ -1646,14 +1605,14 @@ static int __init register_fw_pm_ops(void) if (ret) return ret; - register_syscore_ops(&fw_syscore_ops); + register_syscore(&fw_syscore); return ret; } static inline void unregister_fw_pm_ops(void) { - unregister_syscore_ops(&fw_syscore_ops); + unregister_syscore(&fw_syscore); unregister_pm_notifier(&fw_cache.pm_notify); } #else diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c index c9c93b47d9a5..92e91050f96a 100644 --- a/drivers/base/firmware_loader/sysfs.c +++ b/drivers/base/firmware_loader/sysfs.c @@ -47,7 +47,10 @@ static ssize_t timeout_show(const struct class *class, const struct class_attrib static ssize_t timeout_store(const struct class *class, const struct class_attribute *attr, const char *buf, size_t count) { - int tmp_loading_timeout = simple_strtol(buf, NULL, 10); + int tmp_loading_timeout; + + if (kstrtoint(buf, 10, &tmp_loading_timeout)) + return -EINVAL; if (tmp_loading_timeout < 0) tmp_loading_timeout = 0; @@ -157,7 +160,10 @@ static ssize_t firmware_loading_store(struct device *dev, struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev); struct fw_priv *fw_priv; ssize_t written = count; - int loading = simple_strtol(buf, NULL, 10); + int loading; + + if (kstrtoint(buf, 10, &loading)) + return -EINVAL; mutex_lock(&fw_lock); fw_priv = fw_sysfs->fw_priv; @@ -259,7 +265,7 @@ static void firmware_rw(struct fw_priv *fw_priv, char *buffer, } static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -316,7 +322,7 @@ static int fw_realloc_pages(struct fw_sysfs *fw_sysfs, int min_size) * the driver as a firmware image. **/ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -356,7 +362,7 @@ out: return retval; } -static struct bin_attribute firmware_attr_data = { +static const struct bin_attribute firmware_attr_data = { .attr = { .name = "data", .mode = 0644 }, .size = 0, .read = firmware_data_read, @@ -374,7 +380,7 @@ static struct attribute *fw_dev_attrs[] = { NULL }; -static struct bin_attribute *fw_dev_bin_attrs[] = { +static const struct bin_attribute *const fw_dev_bin_attrs[] = { &firmware_attr_data, NULL }; diff --git a/drivers/base/firmware_loader/sysfs.h b/drivers/base/firmware_loader/sysfs.h index 2060add8ef81..1cabea544a40 100644 --- a/drivers/base/firmware_loader/sysfs.h +++ b/drivers/base/firmware_loader/sysfs.h @@ -6,7 +6,7 @@ #include "firmware.h" -MODULE_IMPORT_NS(FIRMWARE_LOADER_PRIVATE); +MODULE_IMPORT_NS("FIRMWARE_LOADER_PRIVATE"); extern struct firmware_fallback_config fw_fallback_config; extern struct device_attribute dev_attr_loading; diff --git a/drivers/base/firmware_loader/sysfs_upload.c b/drivers/base/firmware_loader/sysfs_upload.c index 829270067d16..c3797b93c5f5 100644 --- a/drivers/base/firmware_loader/sysfs_upload.c +++ b/drivers/base/firmware_loader/sysfs_upload.c @@ -100,8 +100,10 @@ static ssize_t cancel_store(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&fwlp->lock); - if (fwlp->progress == FW_UPLOAD_PROG_IDLE) - ret = -ENODEV; + if (fwlp->progress == FW_UPLOAD_PROG_IDLE) { + mutex_unlock(&fwlp->lock); + return -ENODEV; + } fwlp->ops->cancel(fwlp->fw_upload); mutex_unlock(&fwlp->lock); diff --git a/drivers/base/init.c b/drivers/base/init.c index c4954835128c..9d2b06d65dfc 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -32,6 +32,7 @@ void __init driver_init(void) /* These are also core pieces, but must come after the * core core pieces. */ + faux_bus_init(); of_core_init(); platform_bus_init(); auxiliary_bus_init(); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 67858eeb92ed..751f248ca4a8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -22,6 +22,7 @@ #include <linux/stat.h> #include <linux/slab.h> #include <linux/xarray.h> +#include <linux/export.h> #include <linux/atomic.h> #include <linux/uaccess.h> @@ -48,22 +49,8 @@ int mhp_online_type_from_str(const char *str) #define to_memory_block(dev) container_of(dev, struct memory_block, dev) -static int sections_per_block; - -static inline unsigned long memory_block_id(unsigned long section_nr) -{ - return section_nr / sections_per_block; -} - -static inline unsigned long pfn_to_block_id(unsigned long pfn) -{ - return memory_block_id(pfn_to_section_nr(pfn)); -} - -static inline unsigned long phys_to_block_id(unsigned long phys) -{ - return pfn_to_block_id(PFN_DOWN(phys)); -} +int sections_per_block; +EXPORT_SYMBOL(sections_per_block); static int memory_subsys_online(struct device *dev); static int memory_subsys_offline(struct device *dev); @@ -110,6 +97,57 @@ static void memory_block_release(struct device *dev) kfree(mem); } + +/* Max block size to be set by memory_block_advise_max_size */ +static unsigned long memory_block_advised_size; +static bool memory_block_advised_size_queried; + +/** + * memory_block_advise_max_size() - advise memory hotplug on the max suggested + * block size, usually for alignment. + * @size: suggestion for maximum block size. must be aligned on power of 2. + * + * Early boot software (pre-allocator init) may advise archs on the max block + * size. This value can only decrease after initialization, as the intent is + * to identify the largest supported alignment for all sources. + * + * Use of this value is arch-defined, as is min/max block size. + * + * Return: 0 on success + * -EINVAL if size is 0 or not pow2 aligned + * -EBUSY if value has already been probed + */ +int __init memory_block_advise_max_size(unsigned long size) +{ + if (!size || !is_power_of_2(size)) + return -EINVAL; + + if (memory_block_advised_size_queried) + return -EBUSY; + + if (memory_block_advised_size) + memory_block_advised_size = min(memory_block_advised_size, size); + else + memory_block_advised_size = size; + + return 0; +} + +/** + * memory_block_advised_max_size() - query advised max hotplug block size. + * + * After the first call, the value can never change. Callers looking for the + * actual block size should use memory_block_size_bytes. This interface is + * intended for use by arch-init when initializing the hotplug block size. + * + * Return: advised size in bytes, or 0 if never set. + */ +unsigned long memory_block_advised_max_size(void) +{ + memory_block_advised_size_queried = true; + return memory_block_advised_size; +} + unsigned long __weak memory_block_size_bytes(void) { return MIN_MEMORY_BLOCK_SIZE; @@ -160,15 +198,15 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, break; default: WARN_ON(1); - return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state); + return sysfs_emit(buf, "ERROR-UNKNOWN-%d\n", mem->state); } return sysfs_emit(buf, "%s\n", output); } -int memory_notify(unsigned long val, void *v) +int memory_notify(enum memory_block_state state, void *v) { - return blocking_notifier_call_chain(&memory_chain, val, v); + return blocking_notifier_call_chain(&memory_chain, state, v); } #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) @@ -188,7 +226,6 @@ static int memory_block_online(struct memory_block *mem) unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long nr_vmemmap_pages = 0; - struct memory_notify arg; struct zone *zone; int ret; @@ -208,19 +245,9 @@ static int memory_block_online(struct memory_block *mem) if (mem->altmap) nr_vmemmap_pages = mem->altmap->free; - arg.altmap_start_pfn = start_pfn; - arg.altmap_nr_pages = nr_vmemmap_pages; - arg.start_pfn = start_pfn + nr_vmemmap_pages; - arg.nr_pages = nr_pages - nr_vmemmap_pages; mem_hotplug_begin(); - ret = memory_notify(MEM_PREPARE_ONLINE, &arg); - ret = notifier_to_errno(ret); - if (ret) - goto out_notifier; - if (nr_vmemmap_pages) { - ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, - zone, mem->altmap->inaccessible); + ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); if (ret) goto out; } @@ -242,11 +269,7 @@ static int memory_block_online(struct memory_block *mem) nr_vmemmap_pages); mem->zone = zone; - mem_hotplug_done(); - return ret; out: - memory_notify(MEM_FINISH_OFFLINE, &arg); -out_notifier: mem_hotplug_done(); return ret; } @@ -259,7 +282,6 @@ static int memory_block_offline(struct memory_block *mem) unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long nr_vmemmap_pages = 0; - struct memory_notify arg; int ret; if (!mem->zone) @@ -291,11 +313,6 @@ static int memory_block_offline(struct memory_block *mem) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); mem->zone = NULL; - arg.altmap_start_pfn = start_pfn; - arg.altmap_nr_pages = nr_vmemmap_pages; - arg.start_pfn = start_pfn + nr_vmemmap_pages; - arg.nr_pages = nr_pages - nr_vmemmap_pages; - memory_notify(MEM_FINISH_OFFLINE, &arg); out: mem_hotplug_done(); return ret; @@ -455,7 +472,7 @@ static ssize_t valid_zones_show(struct device *dev, struct memory_group *group = mem->group; struct zone *default_zone; int nid = mem->nid; - int len = 0; + int len; /* * Check the existing zone. Make sure that we do that only on the @@ -466,22 +483,18 @@ static ssize_t valid_zones_show(struct device *dev, * If !mem->zone, the memory block spans multiple zones and * cannot get offlined. */ - default_zone = mem->zone; - if (!default_zone) - return sysfs_emit(buf, "%s\n", "none"); - len += sysfs_emit_at(buf, len, "%s", default_zone->name); - goto out; + return sysfs_emit(buf, "%s\n", + mem->zone ? mem->zone->name : "none"); } default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group, start_pfn, nr_pages); - len += sysfs_emit_at(buf, len, "%s", default_zone->name); + len = sysfs_emit(buf, "%s", default_zone->name); len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, default_zone); len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, default_zone); -out: len += sysfs_emit_at(buf, len, "\n"); return len; } @@ -512,7 +525,7 @@ static ssize_t auto_online_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - online_type_to_str[mhp_default_online_type]); + online_type_to_str[mhp_get_default_online_type()]); } static ssize_t auto_online_blocks_store(struct device *dev, @@ -524,7 +537,7 @@ static ssize_t auto_online_blocks_store(struct device *dev, if (online_type < 0) return -EINVAL; - mhp_default_online_type = online_type; + mhp_set_default_online_type(online_type); return count; } @@ -636,7 +649,7 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) * * Called under device_hotplug_lock. */ -static struct memory_block *find_memory_block_by_id(unsigned long block_id) +struct memory_block *find_memory_block_by_id(unsigned long block_id) { struct memory_block *mem; @@ -735,21 +748,22 @@ static struct zone *early_node_zone_for_memory_block(struct memory_block *mem, #ifdef CONFIG_NUMA /** - * memory_block_add_nid() - Indicate that system RAM falling into this memory - * block device (partially) belongs to the given node. + * memory_block_add_nid_early() - Indicate that early system RAM falling into + * this memory block device (partially) belongs + * to the given node. * @mem: The memory block device. * @nid: The node id. - * @context: The memory initialization context. * - * Indicate that system RAM falling into this memory block (partially) belongs - * to the given node. If the context indicates ("early") that we are adding the - * node during node device subsystem initialization, this will also properly - * set/adjust mem->zone based on the zone ranges of the given node. + * Indicate that early system RAM falling into this memory block (partially) + * belongs to the given node. This will also properly set/adjust mem->zone based + * on the zone ranges of the given node. + * + * Memory hotplug handles this on memory block creation, where we can only have + * a single nid span a memory block. */ -void memory_block_add_nid(struct memory_block *mem, int nid, - enum meminit_context context) +void memory_block_add_nid_early(struct memory_block *mem, int nid) { - if (context == MEMINIT_EARLY && mem->nid != nid) { + if (mem->nid != nid) { /* * For early memory we have to determine the zone when setting * the node id and handle multiple nodes spanning a single @@ -763,19 +777,18 @@ void memory_block_add_nid(struct memory_block *mem, int nid, mem->zone = early_node_zone_for_memory_block(mem, nid); else mem->zone = NULL; + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. If we span multiple nodes (not applicable + * to hotplugged memory), zone == NULL will prohibit memory offlining + * and consequently unplug. + */ + mem->nid = nid; } - - /* - * If this memory block spans multiple nodes, we only indicate - * the last processed node. If we span multiple nodes (not applicable - * to hotplugged memory), zone == NULL will prohibit memory offlining - * and consequently unplug. - */ - mem->nid = nid; } #endif -static int add_memory_block(unsigned long block_id, unsigned long state, +static int add_memory_block(unsigned long block_id, int nid, unsigned long state, struct vmem_altmap *altmap, struct memory_group *group) { @@ -793,7 +806,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state, mem->start_section_nr = block_id * sections_per_block; mem->state = state; - mem->nid = NUMA_NO_NODE; + mem->nid = nid; mem->altmap = altmap; INIT_LIST_HEAD(&mem->group_next); @@ -820,29 +833,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state, return 0; } -static int __init add_boot_memory_block(unsigned long base_section_nr) -{ - int section_count = 0; - unsigned long nr; - - for (nr = base_section_nr; nr < base_section_nr + sections_per_block; - nr++) - if (present_section_nr(nr)) - section_count++; - - if (section_count == 0) - return 0; - return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, NULL, NULL); -} - -static int add_hotplug_memory_block(unsigned long block_id, - struct vmem_altmap *altmap, - struct memory_group *group) -{ - return add_memory_block(block_id, MEM_OFFLINE, altmap, group); -} - static void remove_memory_block(struct memory_block *memory) { if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) @@ -868,7 +858,7 @@ static void remove_memory_block(struct memory_block *memory) * Called under device_hotplug_lock. */ int create_memory_block_devices(unsigned long start, unsigned long size, - struct vmem_altmap *altmap, + int nid, struct vmem_altmap *altmap, struct memory_group *group) { const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); @@ -882,7 +872,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, return -EINVAL; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - ret = add_hotplug_memory_block(block_id, altmap, group); + ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group); if (ret) break; } @@ -962,7 +952,7 @@ static const struct attribute_group *memory_root_attr_groups[] = { void __init memory_dev_init(void) { int ret; - unsigned long block_sz, nr; + unsigned long block_sz, block_id, nr; /* Validate the configured memory block size */ block_sz = memory_block_size_bytes(); @@ -975,15 +965,23 @@ void __init memory_dev_init(void) panic("%s() failed to register subsystem: %d\n", __func__, ret); /* - * Create entries for memory sections that were found - * during boot and have been initialized + * Create entries for memory sections that were found during boot + * and have been initialized. Use @block_id to track the last + * handled block and initialize it to an invalid value (ULONG_MAX) + * to bypass the block ID matching check for the first present + * block so that it can be covered. */ - for (nr = 0; nr <= __highest_present_section_nr; - nr += sections_per_block) { - ret = add_boot_memory_block(nr); - if (ret) - panic("%s() failed to add memory block: %d\n", __func__, - ret); + block_id = ULONG_MAX; + for_each_present_section_nr(0, nr) { + if (block_id != ULONG_MAX && memory_block_id(nr) == block_id) + continue; + + block_id = memory_block_id(nr); + ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL); + if (ret) { + panic("%s() failed to add memory block: %d\n", + __func__, ret); + } } } diff --git a/drivers/base/module.c b/drivers/base/module.c index 5bc71bea883a..218aaa096455 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -42,16 +42,13 @@ int module_add_driver(struct module *mod, const struct device_driver *drv) if (mod) mk = &mod->mkobj; else if (drv->mod_name) { - struct kobject *mkobj; - - /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(module_kset, drv->mod_name); - if (mkobj) { - mk = container_of(mkobj, struct module_kobject, kobj); + /* Lookup or create built-in module entry in /sys/modules */ + mk = lookup_or_create_module_kobject(drv->mod_name); + if (mk) { /* remember our module structure */ drv->p->mkobj = mk; - /* kset_find_obj took a reference */ - kobject_put(mkobj); + /* lookup_or_create_module_kobject took a reference */ + kobject_put(&mk->kobj); } } diff --git a/drivers/base/node.c b/drivers/base/node.c index eb72580288e6..00cf4532f121 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/memory.h> +#include <linux/mempolicy.h> #include <linux/vmstat.h> #include <linux/notifier.h> #include <linux/node.h> @@ -20,6 +21,7 @@ #include <linux/pm_runtime.h> #include <linux/swap.h> #include <linux/slab.h> +#include <linux/memblock.h> static const struct bus_type node_subsys = { .name = "node", @@ -27,7 +29,7 @@ static const struct bus_type node_subsys = { }; static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -45,10 +47,10 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj, return n; } -static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES); static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -66,7 +68,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, return n; } -static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); /** * struct node_access_nodes - Access class device to hold user visible @@ -110,6 +112,27 @@ static const struct attribute_group *node_access_node_groups[] = { NULL, }; +#ifdef CONFIG_MEMORY_HOTPLUG +static BLOCKING_NOTIFIER_HEAD(node_chain); + +int register_node_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&node_chain, nb); +} +EXPORT_SYMBOL(register_node_notifier); + +void unregister_node_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&node_chain, nb); +} +EXPORT_SYMBOL(unregister_node_notifier); + +int node_notify(unsigned long val, void *v) +{ + return blocking_notifier_call_chain(&node_chain, val, v); +} +#endif + static void node_remove_accesses(struct node *node) { struct node_access_nodes *c, *cnext; @@ -214,10 +237,56 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, break; } } + + /* When setting CPU access coordinates, update mempolicy */ + if (access == ACCESS_COORDINATE_CPU) { + if (mempolicy_set_node_perf(nid, coord)) { + pr_info("failed to set mempolicy attrs for node %d\n", + nid); + } + } } EXPORT_SYMBOL_GPL(node_set_perf_attrs); /** + * node_update_perf_attrs - Update the performance values for given access class + * @nid: Node identifier to be updated + * @coord: Heterogeneous memory performance coordinates + * @access: The access class for the given attributes + */ +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct node_access_nodes *access_node; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + list_for_each_entry(access_node, &node->access_list, list_node) { + if (access_node->access != access) + continue; + + access_node->coord = *coord; + for (i = 0; access_attrs[i]; i++) { + sysfs_notify(&access_node->dev.kobj, + NULL, access_attrs[i]->name); + } + break; + } + + /* When setting CPU access coordinates, update mempolicy */ + if (access != ACCESS_COORDINATE_CPU) + return; + + if (mempolicy_set_node_perf(nid, coord)) + pr_info("failed to set mempolicy attrs for node %d\n", nid); +} +EXPORT_SYMBOL_GPL(node_update_perf_attrs); + +/** * struct node_cache_info - Internal tracking for memory node caches * @dev: Device represeting the cache level * @node: List element for tracking in the node @@ -244,12 +313,14 @@ CACHE_ATTR(size, "%llu") CACHE_ATTR(line_size, "%u") CACHE_ATTR(indexing, "%u") CACHE_ATTR(write_policy, "%u") +CACHE_ATTR(address_mode, "%#x") static struct attribute *cache_attrs[] = { &dev_attr_indexing.attr, &dev_attr_size.attr, &dev_attr_line_size.attr, &dev_attr_write_policy.attr, + &dev_attr_address_mode.attr, NULL, }; ATTRIBUTE_GROUPS(cache); @@ -466,8 +537,8 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_PAGETABLE)), nid, K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), nid, 0UL, - nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), - nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), + nid, 0UL, + nid, 0UL, nid, K(sreclaimable + node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), nid, K(sreclaimable + sunreclaimable), @@ -578,7 +649,7 @@ static struct attribute *node_dev_attrs[] = { NULL }; -static struct bin_attribute *node_dev_bin_attrs[] = { +static const struct bin_attribute *node_dev_bin_attrs[] = { &bin_attr_cpumap, &bin_attr_cpulist, NULL @@ -586,7 +657,7 @@ static struct bin_attribute *node_dev_bin_attrs[] = { static const struct attribute_group node_dev_group = { .attrs = node_dev_attrs, - .bin_attrs = node_dev_bin_attrs + .bin_attrs = node_dev_bin_attrs, }; static const struct attribute_group *node_dev_groups[] = { @@ -605,48 +676,6 @@ static void node_device_release(struct device *dev) kfree(to_node(dev)); } -/* - * register_node - Setup a sysfs device for a node. - * @num - Node number to use when creating the device. - * - * Initialize and register the node device. - */ -static int register_node(struct node *node, int num) -{ - int error; - - node->dev.id = num; - node->dev.bus = &node_subsys; - node->dev.release = node_device_release; - node->dev.groups = node_dev_groups; - error = device_register(&node->dev); - - if (error) { - put_device(&node->dev); - } else { - hugetlb_register_node(node); - compaction_register_node(node); - } - - return error; -} - -/** - * unregister_node - unregister a node device - * @node: node going away - * - * Unregisters a node device @node. All the devices on the node must be - * unregistered before calling this function. - */ -void unregister_node(struct node *node) -{ - hugetlb_unregister_node(node); - compaction_unregister_node(node); - node_remove_accesses(node); - node_remove_caches(node); - device_unregister(&node->dev); -} - struct node *node_devices[MAX_NUMNODES]; /* @@ -745,23 +774,11 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) } #ifdef CONFIG_MEMORY_HOTPLUG -static int __ref get_nid_for_pfn(unsigned long pfn) -{ -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT - if (system_state < SYSTEM_RUNNING) - return early_pfn_to_nid(pfn); -#endif - return pfn_to_nid(pfn); -} - static void do_register_memory_block_under_node(int nid, - struct memory_block *mem_blk, - enum meminit_context context) + struct memory_block *mem_blk) { int ret; - memory_block_add_nid(mem_blk, nid, context); - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, kobject_name(&mem_blk->dev.kobj)); @@ -780,46 +797,6 @@ static void do_register_memory_block_under_node(int nid, ret); } -/* register memory section under specified node if it spans that node */ -static int register_mem_block_under_node_early(struct memory_block *mem_blk, - void *arg) -{ - unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; - unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); - unsigned long end_pfn = start_pfn + memory_block_pfns - 1; - int nid = *(int *)arg; - unsigned long pfn; - - for (pfn = start_pfn; pfn <= end_pfn; pfn++) { - int page_nid; - - /* - * memory block could have several absent sections from start. - * skip pfn range from absent section - */ - if (!pfn_in_present_section(pfn)) { - pfn = round_down(pfn + PAGES_PER_SECTION, - PAGES_PER_SECTION) - 1; - continue; - } - - /* - * We need to check if page belongs to nid only at the boot - * case because node's ranges can be interleaved. - */ - page_nid = get_nid_for_pfn(pfn); - if (page_nid < 0) - continue; - if (page_nid != nid) - continue; - - do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY); - return 0; - } - /* mem section does not span the specified node */ - return 0; -} - /* * During hotplug we know that all pages in the memory block belong to the same * node. @@ -829,7 +806,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, { int nid = *(int *)arg; - do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG); + do_register_memory_block_under_node(nid, mem_blk); return 0; } @@ -848,24 +825,51 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -void register_memory_blocks_under_node(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context) +/* register all memory blocks under the corresponding nodes */ +static void register_memory_blocks_under_nodes(void) { - walk_memory_blocks_func_t func; + struct memblock_region *r; + + for_each_mem_region(r) { + const unsigned long start_block_id = phys_to_block_id(r->base); + const unsigned long end_block_id = phys_to_block_id(r->base + r->size - 1); + const int nid = memblock_get_region_node(r); + unsigned long block_id; + + if (!node_online(nid)) + continue; + + for (block_id = start_block_id; block_id <= end_block_id; block_id++) { + struct memory_block *mem; + + mem = find_memory_block_by_id(block_id); + if (!mem) + continue; + + memory_block_add_nid_early(mem, nid); + do_register_memory_block_under_node(nid, mem); + put_device(&mem->dev); + } - if (context == MEMINIT_HOTPLUG) - func = register_mem_block_under_node_hotplug; - else - func = register_mem_block_under_node_early; + } +} +void register_memory_blocks_under_node_hotplug(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), - (void *)&nid, func); + (void *)&nid, register_mem_block_under_node_hotplug); return; } #endif /* CONFIG_MEMORY_HOTPLUG */ -int __register_one_node(int nid) +/** + * register_node - Initialize and register the node device. + * @nid: Node number to use when creating the device. + * + * Return: 0 on success, -errno otherwise + */ +int register_node(int nid) { int error; int cpu; @@ -876,9 +880,22 @@ int __register_one_node(int nid) return -ENOMEM; INIT_LIST_HEAD(&node->access_list); - node_devices[nid] = node; - error = register_node(node_devices[nid], nid); + node->dev.id = nid; + node->dev.bus = &node_subsys; + node->dev.release = node_device_release; + node->dev.groups = node_dev_groups; + + error = device_register(&node->dev); + if (error) { + put_device(&node->dev); + return error; + } + + node_devices[nid] = node; + hugetlb_register_node(node); + compaction_register_node(node); + reclaim_register_node(node); /* link cpu under this node */ for_each_present_cpu(cpu) { @@ -890,13 +907,26 @@ int __register_one_node(int nid) return error; } - -void unregister_one_node(int nid) +/** + * unregister_node - unregister a node device + * @nid: nid of the node going away + * + * Unregisters the node device at node id @nid. All the devices on the + * node must be unregistered before calling this function. + */ +void unregister_node(int nid) { - if (!node_devices[nid]) + struct node *node = node_devices[nid]; + + if (!node) return; - unregister_node(node_devices[nid]); + hugetlb_unregister_node(node); + compaction_unregister_node(node); + reclaim_unregister_node(node); + node_remove_accesses(node); + node_remove_caches(node); + device_unregister(&node->dev); node_devices[nid] = NULL; } @@ -969,11 +999,13 @@ void __init node_dev_init(void) /* * Create all node devices, which will properly link the node - * to applicable memory block devices and already created cpu devices. + * to already created cpu devices. */ for_each_online_node(i) { - ret = register_one_node(i); + ret = register_node(i); if (ret) panic("%s() failed to add node: %d\n", __func__, ret); } + + register_memory_blocks_under_nodes(); } diff --git a/drivers/base/physical_location.c b/drivers/base/physical_location.c index 951819e71b4a..a5539e294d4d 100644 --- a/drivers/base/physical_location.c +++ b/drivers/base/physical_location.c @@ -7,19 +7,18 @@ #include <linux/acpi.h> #include <linux/sysfs.h> +#include <linux/string_choices.h> #include "physical_location.h" bool dev_add_physical_location(struct device *dev) { struct acpi_pld_info *pld; - acpi_status status; if (!has_acpi_companion(dev)) return false; - status = acpi_get_physical_device_location(ACPI_HANDLE(dev), &pld); - if (ACPI_FAILURE(status)) + if (!acpi_get_physical_device_location(ACPI_HANDLE(dev), &pld)) return false; dev->physical_location = @@ -118,7 +117,7 @@ static ssize_t dock_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - dev->physical_location->dock ? "yes" : "no"); + str_yes_no(dev->physical_location->dock)); } static DEVICE_ATTR_RO(dock); @@ -126,7 +125,7 @@ static ssize_t lid_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - dev->physical_location->lid ? "yes" : "no"); + str_yes_no(dev->physical_location->lid)); } static DEVICE_ATTR_RO(lid); diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 0e60dd650b5e..70db08f3ac6f 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -95,5 +95,6 @@ EXPORT_SYMBOL_GPL(platform_device_msi_init_and_alloc_irqs); void platform_device_msi_free_irqs_all(struct device *dev) { msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN); + msi_remove_device_irq_domain(dev, MSI_DEFAULT_DOMAIN); } EXPORT_SYMBOL_GPL(platform_device_msi_free_irqs_all); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 6f2a33722c52..b45d41b018ca 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -150,25 +150,37 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev, EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname); #endif /* CONFIG_HAS_IOMEM */ +static const struct cpumask *get_irq_affinity(struct platform_device *dev, + unsigned int num) +{ + const struct cpumask *mask = NULL; +#ifndef CONFIG_SPARC + struct fwnode_handle *fwnode = dev_fwnode(&dev->dev); + + if (is_of_node(fwnode)) + mask = of_irq_get_affinity(to_of_node(fwnode), num); + else if (is_acpi_device_node(fwnode)) + mask = acpi_irq_get_affinity(ACPI_HANDLE_FWNODE(fwnode), num); +#endif + + return mask ?: cpu_possible_mask; +} + /** - * platform_get_irq_optional - get an optional IRQ for a device - * @dev: platform device - * @num: IRQ number index + * platform_get_irq_affinity - get an optional IRQ and its affinity for a device + * @dev: platform device + * @num: interrupt number index + * @affinity: optional cpumask pointer to get the affinity of a per-cpu interrupt * - * Gets an IRQ for a platform device. Device drivers should check the return - * value for errors so as to not pass a negative integer value to the - * request_irq() APIs. This is the same as platform_get_irq(), except that it - * does not print an error message if an IRQ can not be obtained. + * Gets an interupt for a platform device. Device drivers should check the + * return value for errors so as to not pass a negative integer value to + * the request_irq() APIs. Optional affinity information is provided in the + * affinity pointer if available, and NULL otherwise. * - * For example:: - * - * int irq = platform_get_irq_optional(pdev, 0); - * if (irq < 0) - * return irq; - * - * Return: non-zero IRQ number on success, negative error number on failure. + * Return: non-zero interrupt number on success, negative error number on failure. */ -int platform_get_irq_optional(struct platform_device *dev, unsigned int num) +int platform_get_irq_affinity(struct platform_device *dev, unsigned int num, + const struct cpumask **affinity) { int ret; #ifdef CONFIG_SPARC @@ -236,8 +248,37 @@ out_not_found: out: if (WARN(!ret, "0 is an invalid IRQ number\n")) return -EINVAL; + + if (ret > 0 && affinity) + *affinity = get_irq_affinity(dev, num); + return ret; } +EXPORT_SYMBOL_GPL(platform_get_irq_affinity); + +/** + * platform_get_irq_optional - get an optional interrupt for a device + * @dev: platform device + * @num: interrupt number index + * + * Gets an interrupt for a platform device. Device drivers should check the + * return value for errors so as to not pass a negative integer value to + * the request_irq() APIs. This is the same as platform_get_irq(), except + * that it does not print an error message if an interrupt can not be + * obtained. + * + * For example:: + * + * int irq = platform_get_irq_optional(pdev, 0); + * if (irq < 0) + * return irq; + * + * Return: non-zero interrupt number on success, negative error number on failure. + */ +int platform_get_irq_optional(struct platform_device *dev, unsigned int num) +{ + return platform_get_irq_affinity(dev, num, NULL); +} EXPORT_SYMBOL_GPL(platform_get_irq_optional); /** @@ -982,7 +1023,7 @@ struct platform_device * __init_or_module __platform_create_bundle( struct platform_device *pdev; int error; - pdev = platform_device_alloc(driver->driver.name, -1); + pdev = platform_device_alloc(driver->driver.name, PLATFORM_DEVID_NONE); if (!pdev) { error = -ENOMEM; goto err_out; @@ -1396,15 +1437,13 @@ static int platform_probe(struct device *_dev) if (ret < 0) return ret; - ret = dev_pm_domain_attach(_dev, true); + ret = dev_pm_domain_attach(_dev, PD_FLAG_ATTACH_POWER_ON | + PD_FLAG_DETACH_POWER_OFF); if (ret) goto out; - if (drv->probe) { + if (drv->probe) ret = drv->probe(dev); - if (ret) - dev_pm_domain_detach(_dev, true); - } out: if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) { @@ -1422,7 +1461,6 @@ static void platform_remove(struct device *_dev) if (drv->remove) drv->remove(dev); - dev_pm_domain_detach(_dev, true); } static void platform_shutdown(struct device *_dev) @@ -1440,7 +1478,7 @@ static void platform_shutdown(struct device *_dev) static int platform_dma_configure(struct device *dev) { - struct platform_driver *drv = to_platform_driver(dev->driver); + struct device_driver *drv = READ_ONCE(dev->driver); struct fwnode_handle *fwnode = dev_fwnode(dev); enum dev_dma_attr attr; int ret = 0; @@ -1451,7 +1489,8 @@ static int platform_dma_configure(struct device *dev) attr = acpi_get_dma_attr(to_acpi_device_node(fwnode)); ret = acpi_dma_configure(dev, attr); } - if (ret || drv->driver_managed_dma) + /* @dev->driver may not be valid when we're called from the IOMMU layer */ + if (ret || !drv || to_platform_driver(drv)->driver_managed_dma) return ret; ret = iommu_device_use_default_domain(dev); diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 01f11629d241..2989e42d0161 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o wakeup_stats.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o obj-$(CONFIG_PM_QOS_KUNIT_TEST) += qos-test.o +obj-$(CONFIG_PM_RUNTIME_KUNIT_TEST) += runtime-test.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index e18ba676cdf6..b69bcb37c830 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -259,39 +259,6 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk) } EXPORT_SYMBOL_GPL(pm_clk_add_clk); - -/** - * of_pm_clk_add_clk - Start using a device clock for power management. - * @dev: Device whose clock is going to be used for power management. - * @name: Name of clock that is going to be used for power management. - * - * Add the clock described in the 'clocks' device-tree node that matches - * with the 'name' provided, to the list of clocks used for the power - * management of @dev. On success, returns 0. Returns a negative error - * code if the clock is not found or cannot be added. - */ -int of_pm_clk_add_clk(struct device *dev, const char *name) -{ - struct clk *clk; - int ret; - - if (!dev || !dev->of_node || !name) - return -EINVAL; - - clk = of_clk_get_by_name(dev->of_node, name); - if (IS_ERR(clk)) - return PTR_ERR(clk); - - ret = pm_clk_add_clk(dev, clk); - if (ret) { - clk_put(clk); - return ret; - } - - return 0; -} -EXPORT_SYMBOL_GPL(of_pm_clk_add_clk); - /** * of_pm_clk_add_clks - Start using device clock(s) for power management. * @dev: Device whose clock(s) is going to be used for power management. @@ -377,46 +344,6 @@ static void __pm_clk_remove(struct pm_clock_entry *ce) } /** - * pm_clk_remove - Stop using a device clock for power management. - * @dev: Device whose clock should not be used for PM any more. - * @con_id: Connection ID of the clock. - * - * Remove the clock represented by @con_id from the list of clocks used for - * the power management of @dev. - */ -void pm_clk_remove(struct device *dev, const char *con_id) -{ - struct pm_subsys_data *psd = dev_to_psd(dev); - struct pm_clock_entry *ce; - - if (!psd) - return; - - pm_clk_list_lock(psd); - - list_for_each_entry(ce, &psd->clock_list, node) { - if (!con_id && !ce->con_id) - goto remove; - else if (!con_id || !ce->con_id) - continue; - else if (!strcmp(con_id, ce->con_id)) - goto remove; - } - - pm_clk_list_unlock(psd); - return; - - remove: - list_del(&ce->node); - if (ce->enabled_when_prepared) - psd->clock_op_might_sleep--; - pm_clk_list_unlock(psd); - - __pm_clk_remove(ce); -} -EXPORT_SYMBOL_GPL(pm_clk_remove); - -/** * pm_clk_remove_clk - Stop using a device clock for power management. * @dev: Device whose clock should not be used for PM any more. * @clk: Clock pointer diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 781968a128ff..6ecf9ce4a4e6 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data); /** * dev_pm_domain_attach - Attach a device to its PM domain. * @dev: Device to attach. - * @power_on: Used to indicate whether we should power on the device. + * @flags: indicate whether we should power on/off the device on attach/detach * * The @dev may only be attached to a single PM domain. By iterating through * the available alternatives we try to find a valid PM domain for the device. @@ -100,17 +100,20 @@ EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data); * Returns 0 on successfully attached PM domain, or when it is found that the * device doesn't need a PM domain, else a negative error code. */ -int dev_pm_domain_attach(struct device *dev, bool power_on) +int dev_pm_domain_attach(struct device *dev, u32 flags) { int ret; if (dev->pm_domain) return 0; - ret = acpi_dev_pm_attach(dev, power_on); + ret = acpi_dev_pm_attach(dev, !!(flags & PD_FLAG_ATTACH_POWER_ON)); if (!ret) ret = genpd_dev_pm_attach(dev); + if (dev->pm_domain) + dev->power.detach_power_off = !!(flags & PD_FLAG_DETACH_POWER_OFF); + return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(dev_pm_domain_attach); diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 4fa525668cb7..af99bbcf281c 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -8,6 +8,13 @@ #include <linux/pm_runtime.h> #include <linux/export.h> +#define CALL_PM_OP(dev, op) \ +({ \ + struct device *_dev = (dev); \ + const struct dev_pm_ops *pm = _dev->driver ? _dev->driver->pm : NULL; \ + pm && pm->op ? pm->op(_dev) : 0; \ +}) + #ifdef CONFIG_PM /** * pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems. @@ -19,12 +26,7 @@ */ int pm_generic_runtime_suspend(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - int ret; - - ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : 0; - - return ret; + return CALL_PM_OP(dev, runtime_suspend); } EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend); @@ -38,12 +40,7 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend); */ int pm_generic_runtime_resume(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - int ret; - - ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : 0; - - return ret; + return CALL_PM_OP(dev, runtime_resume); } EXPORT_SYMBOL_GPL(pm_generic_runtime_resume); #endif /* CONFIG_PM */ @@ -72,9 +69,7 @@ int pm_generic_prepare(struct device *dev) */ int pm_generic_suspend_noirq(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->suspend_noirq ? pm->suspend_noirq(dev) : 0; + return CALL_PM_OP(dev, suspend_noirq); } EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq); @@ -84,9 +79,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq); */ int pm_generic_suspend_late(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->suspend_late ? pm->suspend_late(dev) : 0; + return CALL_PM_OP(dev, suspend_late); } EXPORT_SYMBOL_GPL(pm_generic_suspend_late); @@ -96,9 +89,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_late); */ int pm_generic_suspend(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->suspend ? pm->suspend(dev) : 0; + return CALL_PM_OP(dev, suspend); } EXPORT_SYMBOL_GPL(pm_generic_suspend); @@ -108,33 +99,17 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend); */ int pm_generic_freeze_noirq(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->freeze_noirq ? pm->freeze_noirq(dev) : 0; + return CALL_PM_OP(dev, freeze_noirq); } EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq); /** - * pm_generic_freeze_late - Generic freeze_late callback for subsystems. - * @dev: Device to freeze. - */ -int pm_generic_freeze_late(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->freeze_late ? pm->freeze_late(dev) : 0; -} -EXPORT_SYMBOL_GPL(pm_generic_freeze_late); - -/** * pm_generic_freeze - Generic freeze callback for subsystems. * @dev: Device to freeze. */ int pm_generic_freeze(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->freeze ? pm->freeze(dev) : 0; + return CALL_PM_OP(dev, freeze); } EXPORT_SYMBOL_GPL(pm_generic_freeze); @@ -144,9 +119,7 @@ EXPORT_SYMBOL_GPL(pm_generic_freeze); */ int pm_generic_poweroff_noirq(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->poweroff_noirq ? pm->poweroff_noirq(dev) : 0; + return CALL_PM_OP(dev, poweroff_noirq); } EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq); @@ -156,9 +129,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq); */ int pm_generic_poweroff_late(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->poweroff_late ? pm->poweroff_late(dev) : 0; + return CALL_PM_OP(dev, poweroff_late); } EXPORT_SYMBOL_GPL(pm_generic_poweroff_late); @@ -168,9 +139,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff_late); */ int pm_generic_poweroff(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->poweroff ? pm->poweroff(dev) : 0; + return CALL_PM_OP(dev, poweroff); } EXPORT_SYMBOL_GPL(pm_generic_poweroff); @@ -180,33 +149,17 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff); */ int pm_generic_thaw_noirq(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->thaw_noirq ? pm->thaw_noirq(dev) : 0; + return CALL_PM_OP(dev, thaw_noirq); } EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq); /** - * pm_generic_thaw_early - Generic thaw_early callback for subsystems. - * @dev: Device to thaw. - */ -int pm_generic_thaw_early(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->thaw_early ? pm->thaw_early(dev) : 0; -} -EXPORT_SYMBOL_GPL(pm_generic_thaw_early); - -/** * pm_generic_thaw - Generic thaw callback for subsystems. * @dev: Device to thaw. */ int pm_generic_thaw(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->thaw ? pm->thaw(dev) : 0; + return CALL_PM_OP(dev, thaw); } EXPORT_SYMBOL_GPL(pm_generic_thaw); @@ -216,9 +169,7 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw); */ int pm_generic_resume_noirq(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->resume_noirq ? pm->resume_noirq(dev) : 0; + return CALL_PM_OP(dev, resume_noirq); } EXPORT_SYMBOL_GPL(pm_generic_resume_noirq); @@ -228,9 +179,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_noirq); */ int pm_generic_resume_early(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->resume_early ? pm->resume_early(dev) : 0; + return CALL_PM_OP(dev, resume_early); } EXPORT_SYMBOL_GPL(pm_generic_resume_early); @@ -240,9 +189,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_early); */ int pm_generic_resume(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->resume ? pm->resume(dev) : 0; + return CALL_PM_OP(dev, resume); } EXPORT_SYMBOL_GPL(pm_generic_resume); @@ -252,9 +199,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume); */ int pm_generic_restore_noirq(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->restore_noirq ? pm->restore_noirq(dev) : 0; + return CALL_PM_OP(dev, restore_noirq); } EXPORT_SYMBOL_GPL(pm_generic_restore_noirq); @@ -264,9 +209,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_noirq); */ int pm_generic_restore_early(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->restore_early ? pm->restore_early(dev) : 0; + return CALL_PM_OP(dev, restore_early); } EXPORT_SYMBOL_GPL(pm_generic_restore_early); @@ -276,9 +219,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_early); */ int pm_generic_restore(struct device *dev) { - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->restore ? pm->restore(dev) : 0; + return CALL_PM_OP(dev, restore); } EXPORT_SYMBOL_GPL(pm_generic_restore); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 4a67e83300e1..97a8b4fcf471 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -34,16 +34,13 @@ #include <linux/cpufreq.h> #include <linux/devfreq.h> #include <linux/timer.h> +#include <linux/nmi.h> #include "../base.h" #include "power.h" typedef int (*pm_callback_t)(struct device *); -#define list_for_each_entry_rcu_locked(pos, head, member) \ - list_for_each_entry_rcu(pos, head, member, \ - device_links_read_lock_held()) - /* * The entries in the dpm_list list are in a depth first order, simply * because children are guaranteed to be discovered after parents, and @@ -63,8 +60,23 @@ static LIST_HEAD(dpm_noirq_list); static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; +static DEFINE_MUTEX(async_wip_mtx); static int async_error; +/** + * pm_hibernate_is_recovering - if recovering from hibernate due to error. + * + * Used to query if dev_pm_ops.thaw() is called for normal hibernation case or + * recovering from some error. + * + * Return: true for error case, false for normal case. + */ +bool pm_hibernate_is_recovering(void) +{ + return pm_transition.event == PM_EVENT_RECOVER; +} +EXPORT_SYMBOL_GPL(pm_hibernate_is_recovering); + static const char *pm_verb(int event) { switch (event) { @@ -84,6 +96,8 @@ static const char *pm_verb(int event) return "restore"; case PM_EVENT_RECOVER: return "recover"; + case PM_EVENT_POWEROFF: + return "poweroff"; default: return "(unknown PM event)"; } @@ -249,7 +263,7 @@ static int dpm_wait_fn(struct device *dev, void *async_ptr) static void dpm_wait_for_children(struct device *dev, bool async) { - device_for_each_child(dev, &async, dpm_wait_fn); + device_for_each_child(dev, &async, dpm_wait_fn); } static void dpm_wait_for_suppliers(struct device *dev, bool async) @@ -266,8 +280,9 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) * callbacks freeing the link objects for the links in the list we're * walking. */ - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) - if (READ_ONCE(link->status) != DL_STATE_DORMANT) + dev_for_each_link_to_supplier(link, dev) + if (READ_ONCE(link->status) != DL_STATE_DORMANT && + !device_link_flag_is_sync_state_only(link->flags)) dpm_wait(link->supplier, async); device_links_read_unlock(idx); @@ -323,8 +338,9 @@ static void dpm_wait_for_consumers(struct device *dev, bool async) * continue instead of trying to continue in parallel with its * unregistration). */ - list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node) - if (READ_ONCE(link->status) != DL_STATE_DORMANT) + dev_for_each_link_to_consumer(link, dev) + if (READ_ONCE(link->status) != DL_STATE_DORMANT && + !device_link_flag_is_sync_state_only(link->flags)) dpm_wait(link->consumer, async); device_links_read_unlock(idx); @@ -354,6 +370,7 @@ static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state) case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return ops->freeze; + case PM_EVENT_POWEROFF: case PM_EVENT_HIBERNATE: return ops->poweroff; case PM_EVENT_THAW: @@ -388,6 +405,7 @@ static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops, case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return ops->freeze_late; + case PM_EVENT_POWEROFF: case PM_EVENT_HIBERNATE: return ops->poweroff_late; case PM_EVENT_THAW: @@ -422,6 +440,7 @@ static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t stat case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return ops->freeze_noirq; + case PM_EVENT_POWEROFF: case PM_EVENT_HIBERNATE: return ops->poweroff_noirq; case PM_EVENT_THAW: @@ -496,11 +515,17 @@ struct dpm_watchdog { struct device *dev; struct task_struct *tsk; struct timer_list timer; + bool fatal; }; #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \ struct dpm_watchdog wd +static bool __read_mostly dpm_watchdog_all_cpu_backtrace; +module_param(dpm_watchdog_all_cpu_backtrace, bool, 0644); +MODULE_PARM_DESC(dpm_watchdog_all_cpu_backtrace, + "Backtrace all CPUs on DPM watchdog timeout"); + /** * dpm_watchdog_handler - Driver suspend / resume watchdog handler. * @t: The timer that PM watchdog depends on. @@ -511,12 +536,28 @@ struct dpm_watchdog { */ static void dpm_watchdog_handler(struct timer_list *t) { - struct dpm_watchdog *wd = from_timer(wd, t, timer); + struct dpm_watchdog *wd = timer_container_of(wd, t, timer); + struct timer_list *timer = &wd->timer; + unsigned int time_left; + + if (wd->fatal) { + unsigned int this_cpu = smp_processor_id(); + + dev_emerg(wd->dev, "**** DPM device timeout ****\n"); + show_stack(wd->tsk, NULL, KERN_EMERG); + if (dpm_watchdog_all_cpu_backtrace) + trigger_allbutcpu_cpu_backtrace(this_cpu); + panic("%s %s: unrecoverable failure\n", + dev_driver_string(wd->dev), dev_name(wd->dev)); + } + + time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT; + dev_warn(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n", + CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left); + show_stack(wd->tsk, NULL, KERN_WARNING); - dev_emerg(wd->dev, "**** DPM device timeout ****\n"); - show_stack(wd->tsk, NULL, KERN_EMERG); - panic("%s %s: unrecoverable failure\n", - dev_driver_string(wd->dev), dev_name(wd->dev)); + wd->fatal = true; + mod_timer(timer, jiffies + HZ * time_left); } /** @@ -530,10 +571,11 @@ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev) wd->dev = dev; wd->tsk = current; + wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT; timer_setup_on_stack(timer, dpm_watchdog_handler, 0); /* use same timeout value for both suspend and resume */ - timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT; + timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT; add_timer(timer); } @@ -545,8 +587,8 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) { struct timer_list *timer = &wd->timer; - del_timer_sync(timer); - destroy_timer_on_stack(timer); + timer_delete_sync(timer); + timer_destroy_on_stack(timer); } #else #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) @@ -583,29 +625,96 @@ static bool is_async(struct device *dev) && !pm_trace_is_enabled(); } +static bool __dpm_async(struct device *dev, async_func_t func) +{ + if (dev->power.work_in_progress) + return true; + + if (!is_async(dev)) + return false; + + dev->power.work_in_progress = true; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) + return true; + + put_device(dev); + + return false; +} + static bool dpm_async_fn(struct device *dev, async_func_t func) { - reinit_completion(&dev->power.completion); + guard(mutex)(&async_wip_mtx); - if (is_async(dev)) { - dev->power.async_in_progress = true; + return __dpm_async(dev, func); +} - get_device(dev); +static int dpm_async_with_cleanup(struct device *dev, void *fn) +{ + guard(mutex)(&async_wip_mtx); - if (async_schedule_dev_nocall(func, dev)) - return true; + if (!__dpm_async(dev, fn)) + dev->power.work_in_progress = false; - put_device(dev); - } + return 0; +} + +static void dpm_async_resume_children(struct device *dev, async_func_t func) +{ /* - * Because async_schedule_dev_nocall() above has returned false or it - * has not been called at all, func() is not running and it is safe to - * update the async_in_progress flag without extra synchronization. + * Prevent racing with dpm_clear_async_state() during initial list + * walks in dpm_noirq_resume_devices(), dpm_resume_early(), and + * dpm_resume(). */ - dev->power.async_in_progress = false; - return false; + guard(mutex)(&dpm_list_mtx); + + /* + * Start processing "async" children of the device unless it's been + * started already for them. + */ + device_for_each_child(dev, func, dpm_async_with_cleanup); +} + +static void dpm_async_resume_subordinate(struct device *dev, async_func_t func) +{ + struct device_link *link; + int idx; + + dpm_async_resume_children(dev, func); + + idx = device_links_read_lock(); + + /* Start processing the device's "async" consumers. */ + dev_for_each_link_to_consumer(link, dev) + if (READ_ONCE(link->status) != DL_STATE_DORMANT) + dpm_async_with_cleanup(link->consumer, func); + + device_links_read_unlock(idx); +} + +static void dpm_clear_async_state(struct device *dev) +{ + reinit_completion(&dev->power.completion); + dev->power.work_in_progress = false; +} + +static bool dpm_root_device(struct device *dev) +{ + lockdep_assert_held(&dpm_list_mtx); + + /* + * Since this function is required to run under dpm_list_mtx, the + * list_empty() below will only return true if the device's list of + * consumers is actually empty before calling it. + */ + return !dev->parent && list_empty(&dev->links.suppliers); } +static void async_resume_noirq(void *data, async_cookie_t cookie); + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -628,8 +737,20 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy if (dev->power.syscore || dev->power.direct_complete) goto Out; - if (!dev->power.is_noirq_suspended) + if (!dev->power.is_noirq_suspended) { + /* + * This means that system suspend has been aborted in the noirq + * phase before invoking the noirq suspend callback for the + * device, so if device_suspend_late() has left it in suspend, + * device_resume_early() should leave it in suspend either in + * case the early resume of it depends on the noirq resume that + * has not run. + */ + if (dev_pm_skip_suspend(dev)) + dev->power.must_resume = false; + goto Out; + } if (!dpm_wait_for_superior(dev, async)) goto Out; @@ -642,12 +763,12 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy * so change its status accordingly. * * Otherwise, the device is going to be resumed, so set its PM-runtime - * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set - * to avoid confusing drivers that don't use it. + * status to "active" unless its power.smart_suspend flag is clear, in + * which case it is not necessary to update its PM-runtime status. */ if (skip_resume) pm_runtime_set_suspended(dev); - else if (dev_pm_skip_suspend(dev)) + else if (dev_pm_smart_suspend(dev)) pm_runtime_set_active(dev); if (dev->pm_domain) { @@ -685,10 +806,12 @@ Out: TRACE_RESUME(error); if (error) { - async_error = error; + WRITE_ONCE(async_error, error); dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); } + + dpm_async_resume_subordinate(dev, async_resume_noirq); } static void async_resume_noirq(void *data, async_cookie_t cookie) @@ -712,17 +835,20 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); /* - * Trigger the resume of "async" devices upfront so they don't have to - * wait for the "non-async" ones they don't depend on. + * Start processing "async" root devices upfront so they don't wait for + * the "sync" devices they don't depend on. */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); + list_for_each_entry(dev, &dpm_noirq_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_root_device(dev)) + dpm_async_with_cleanup(dev, async_resume_noirq); + } while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); list_move_tail(&dev->power.entry, &dpm_late_early_list); - if (!dev->power.async_in_progress) { + if (!dpm_async_fn(dev, async_resume_noirq)) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -737,7 +863,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, "noirq"); - if (async_error) + if (READ_ONCE(async_error)) dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false); @@ -758,6 +884,8 @@ void dpm_resume_noirq(pm_message_t state) device_wakeup_disarm_wake_irqs(); } +static void async_resume_early(void *data, async_cookie_t cookie); + /** * device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. @@ -775,12 +903,15 @@ static void device_resume_early(struct device *dev, pm_message_t state, bool asy TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->power.syscore || dev->power.direct_complete) + if (dev->power.direct_complete) goto Out; if (!dev->power.is_late_suspended) goto Out; + if (dev->power.syscore) + goto Skip; + if (!dpm_wait_for_superior(dev, async)) goto Out; @@ -813,18 +944,20 @@ Run: Skip: dev->power.is_late_suspended = false; + pm_runtime_enable(dev); Out: TRACE_RESUME(error); - pm_runtime_enable(dev); complete_all(&dev->power.completion); if (error) { - async_error = error; + WRITE_ONCE(async_error, error); dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async early" : " early", error); } + + dpm_async_resume_subordinate(dev, async_resume_early); } static void async_resume_early(void *data, async_cookie_t cookie) @@ -852,17 +985,20 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); /* - * Trigger the resume of "async" devices upfront so they don't have to - * wait for the "non-async" ones they don't depend on. + * Start processing "async" root devices upfront so they don't wait for + * the "sync" devices they don't depend on. */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); + list_for_each_entry(dev, &dpm_late_early_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_root_device(dev)) + dpm_async_with_cleanup(dev, async_resume_early); + } while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); list_move_tail(&dev->power.entry, &dpm_suspended_list); - if (!dev->power.async_in_progress) { + if (!dpm_async_fn(dev, async_resume_early)) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -877,7 +1013,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, "early"); - if (async_error) + if (READ_ONCE(async_error)) dpm_save_failed_step(SUSPEND_RESUME_EARLY); trace_suspend_resume(TPS("dpm_resume_early"), state.event, false); @@ -894,6 +1030,8 @@ void dpm_resume_start(pm_message_t state) } EXPORT_SYMBOL_GPL(dpm_resume_start); +static void async_resume(void *data, async_cookie_t cookie); + /** * device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. @@ -913,8 +1051,20 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + if (!dev->power.is_suspended) + goto Complete; + + dev->power.is_suspended = false; + if (dev->power.direct_complete) { - /* Match the pm_runtime_disable() in __device_suspend(). */ + /* + * Allow new children to be added under the device after this + * point if it has no PM callbacks. + */ + if (dev->power.no_pm_callbacks) + dev->power.is_prepared = false; + + /* Match the pm_runtime_disable() in device_suspend(). */ pm_runtime_enable(dev); goto Complete; } @@ -931,9 +1081,6 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) */ dev->power.is_prepared = false; - if (!dev->power.is_suspended) - goto Unlock; - if (dev->pm_domain) { info = "power domain "; callback = pm_op(&dev->pm_domain->ops, state); @@ -971,9 +1118,7 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) End: error = dpm_run_callback(callback, dev, state, info); - dev->power.is_suspended = false; - Unlock: device_unlock(dev); dpm_watchdog_clear(&wd); @@ -983,10 +1128,12 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); if (error) { - async_error = error; + WRITE_ONCE(async_error, error); dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async" : "", error); } + + dpm_async_resume_subordinate(dev, async_resume); } static void async_resume(void *data, async_cookie_t cookie) @@ -1010,7 +1157,6 @@ void dpm_resume(pm_message_t state) ktime_t starttime = ktime_get(); trace_suspend_resume(TPS("dpm_resume"), state.event, true); - might_sleep(); pm_transition = state; async_error = 0; @@ -1018,17 +1164,20 @@ void dpm_resume(pm_message_t state) mutex_lock(&dpm_list_mtx); /* - * Trigger the resume of "async" devices upfront so they don't have to - * wait for the "non-async" ones they don't depend on. + * Start processing "async" root devices upfront so they don't wait for + * the "sync" devices they don't depend on. */ - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); + list_for_each_entry(dev, &dpm_suspended_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_root_device(dev)) + dpm_async_with_cleanup(dev, async_resume); + } while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); list_move_tail(&dev->power.entry, &dpm_prepared_list); - if (!dev->power.async_in_progress) { + if (!dpm_async_fn(dev, async_resume)) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -1043,7 +1192,7 @@ void dpm_resume(pm_message_t state) mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, NULL); - if (async_error) + if (READ_ONCE(async_error)) dpm_save_failed_step(SUSPEND_RESUME); cpufreq_resume(); @@ -1093,6 +1242,8 @@ static void device_complete(struct device *dev, pm_message_t state) device_unlock(dev); out: + /* If enabling runtime PM for the device is blocked, unblock it. */ + pm_runtime_unblock(dev); pm_runtime_put(dev); } @@ -1108,7 +1259,6 @@ void dpm_complete(pm_message_t state) struct list_head list; trace_suspend_resume(TPS("dpm_complete"), state.event, true); - might_sleep(); INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); @@ -1147,6 +1297,7 @@ void dpm_complete(pm_message_t state) void dpm_resume_end(pm_message_t state) { dpm_resume(state); + pm_restore_gfp_mask(); dpm_complete(state); } EXPORT_SYMBOL_GPL(dpm_resume_end); @@ -1154,6 +1305,82 @@ EXPORT_SYMBOL_GPL(dpm_resume_end); /*------------------------- Suspend routines -------------------------*/ +static bool dpm_leaf_device(struct device *dev) +{ + struct device *child; + + lockdep_assert_held(&dpm_list_mtx); + + child = device_find_any_child(dev); + if (child) { + put_device(child); + + return false; + } + + /* + * Since this function is required to run under dpm_list_mtx, the + * list_empty() below will only return true if the device's list of + * consumers is actually empty before calling it. + */ + return list_empty(&dev->links.consumers); +} + +static bool dpm_async_suspend_parent(struct device *dev, async_func_t func) +{ + guard(mutex)(&dpm_list_mtx); + + /* + * If the device is suspended asynchronously and the parent's callback + * deletes both the device and the parent itself, the parent object may + * be freed while this function is running, so avoid that by checking + * if the device has been deleted already as the parent cannot be + * deleted before it. + */ + if (!device_pm_initialized(dev)) + return false; + + /* Start processing the device's parent if it is "async". */ + if (dev->parent) + dpm_async_with_cleanup(dev->parent, func); + + return true; +} + +static void dpm_async_suspend_superior(struct device *dev, async_func_t func) +{ + struct device_link *link; + int idx; + + if (!dpm_async_suspend_parent(dev, func)) + return; + + idx = device_links_read_lock(); + + /* Start processing the device's "async" suppliers. */ + dev_for_each_link_to_supplier(link, dev) + if (READ_ONCE(link->status) != DL_STATE_DORMANT) + dpm_async_with_cleanup(link->supplier, func); + + device_links_read_unlock(idx); +} + +static void dpm_async_suspend_complete_all(struct list_head *device_list) +{ + struct device *dev; + + guard(mutex)(&async_wip_mtx); + + list_for_each_entry_reverse(dev, device_list, power.entry) { + /* + * In case the device is being waited for and async processing + * has not started for it yet, let the waiters make progress. + */ + if (!dev->power.work_in_progress) + complete_all(&dev->power.completion); + } +} + /** * resume_event - Return a "resume" message for given "suspend" sleep state. * @sleep_state: PM message representing a sleep state. @@ -1185,12 +1412,14 @@ static void dpm_superior_set_must_resume(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + dev_for_each_link_to_supplier(link, dev) link->supplier->power.must_resume = true; device_links_read_unlock(idx); } +static void async_suspend_noirq(void *data, async_cookie_t cookie); + /** * device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. @@ -1200,7 +1429,7 @@ static void dpm_superior_set_must_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_suspend_noirq(struct device *dev, pm_message_t state, bool async) +static void device_suspend_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -1211,7 +1440,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy dpm_wait_for_subordinate(dev, async); - if (async_error) + if (READ_ONCE(async_error)) goto Complete; if (dev->power.syscore || dev->power.direct_complete) @@ -1244,7 +1473,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy Run: error = dpm_run_callback(callback, dev, state, info); if (error) { - async_error = error; + WRITE_ONCE(async_error, error); dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); goto Complete; @@ -1254,14 +1483,13 @@ Skip: dev->power.is_noirq_suspended = true; /* - * Skipping the resume of devices that were in use right before the - * system suspend (as indicated by their PM-runtime usage counters) - * would be suboptimal. Also resume them if doing that is not allowed - * to be skipped. + * Devices must be resumed unless they are explicitly allowed to be left + * in suspend, but even in that case skipping the resume of devices that + * were in use right before the system suspend (as indicated by their + * runtime PM usage counters and child counters) would be suboptimal. */ - if (atomic_read(&dev->power.usage_count) > 1 || - !(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) && - dev->power.may_skip_resume)) + if (!(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) && + dev->power.may_skip_resume) || !pm_runtime_need_not_resume(dev)) dev->power.must_resume = true; if (dev->power.must_resume) @@ -1270,7 +1498,11 @@ Skip: Complete: complete_all(&dev->power.completion); TRACE_SUSPEND(error); - return error; + + if (error || READ_ONCE(async_error)) + return; + + dpm_async_suspend_superior(dev, async_suspend_noirq); } static void async_suspend_noirq(void *data, async_cookie_t cookie) @@ -1284,7 +1516,8 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie) static int dpm_noirq_suspend_devices(pm_message_t state) { ktime_t starttime = ktime_get(); - int error = 0; + struct device *dev; + int error; trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true); @@ -1293,8 +1526,18 @@ static int dpm_noirq_suspend_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); + /* + * Start processing "async" leaf devices upfront so they don't need to + * wait for the "sync" devices they don't depend on. + */ + list_for_each_entry_reverse(dev, &dpm_late_early_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_leaf_device(dev)) + dpm_async_with_cleanup(dev, async_suspend_noirq); + } + while (!list_empty(&dpm_late_early_list)) { - struct device *dev = to_device(dpm_late_early_list.prev); + dev = to_device(dpm_late_early_list.prev); list_move(&dev->power.entry, &dpm_noirq_list); @@ -1305,22 +1548,28 @@ static int dpm_noirq_suspend_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - error = device_suspend_noirq(dev, state, false); + device_suspend_noirq(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); - if (error || async_error) + if (READ_ONCE(async_error)) { + dpm_async_suspend_complete_all(&dpm_late_early_list); + /* + * Move all devices to the target list to resume them + * properly. + */ + list_splice_init(&dpm_late_early_list, &dpm_noirq_list); break; + } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); - if (!error) - error = async_error; + error = READ_ONCE(async_error); if (error) dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ); @@ -1365,6 +1614,8 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev) spin_unlock_irq(&parent->power.lock); } +static void async_suspend_late(void *data, async_cookie_t cookie); + /** * device_suspend_late - Execute a "late suspend" callback for given device. * @dev: Device to handle. @@ -1373,7 +1624,7 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev) * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_suspend_late(struct device *dev, pm_message_t state, bool async) +static void device_suspend_late(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -1382,21 +1633,28 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_SUSPEND(0); - __pm_runtime_disable(dev, false); - dpm_wait_for_subordinate(dev, async); - if (async_error) + if (READ_ONCE(async_error)) goto Complete; if (pm_wakeup_pending()) { - async_error = -EBUSY; + WRITE_ONCE(async_error, -EBUSY); goto Complete; } - if (dev->power.syscore || dev->power.direct_complete) + if (dev->power.direct_complete) goto Complete; + /* + * Disable runtime PM for the device without checking if there is a + * pending resume request for it. + */ + __pm_runtime_disable(dev, false); + + if (dev->power.syscore) + goto Skip; + if (dev->pm_domain) { info = "late power domain "; callback = pm_late_early_op(&dev->pm_domain->ops, state); @@ -1424,9 +1682,10 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn Run: error = dpm_run_callback(callback, dev, state, info); if (error) { - async_error = error; + WRITE_ONCE(async_error, error); dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async late" : " late", error); + pm_runtime_enable(dev); goto Complete; } dpm_propagate_wakeup_to_parent(dev); @@ -1437,7 +1696,11 @@ Skip: Complete: TRACE_SUSPEND(error); complete_all(&dev->power.completion); - return error; + + if (error || READ_ONCE(async_error)) + return; + + dpm_async_suspend_superior(dev, async_suspend_late); } static void async_suspend_late(void *data, async_cookie_t cookie) @@ -1455,7 +1718,8 @@ static void async_suspend_late(void *data, async_cookie_t cookie) int dpm_suspend_late(pm_message_t state) { ktime_t starttime = ktime_get(); - int error = 0; + struct device *dev; + int error; trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true); @@ -1466,8 +1730,18 @@ int dpm_suspend_late(pm_message_t state) mutex_lock(&dpm_list_mtx); + /* + * Start processing "async" leaf devices upfront so they don't need to + * wait for the "sync" devices they don't depend on. + */ + list_for_each_entry_reverse(dev, &dpm_suspended_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_leaf_device(dev)) + dpm_async_with_cleanup(dev, async_suspend_late); + } + while (!list_empty(&dpm_suspended_list)) { - struct device *dev = to_device(dpm_suspended_list.prev); + dev = to_device(dpm_suspended_list.prev); list_move(&dev->power.entry, &dpm_late_early_list); @@ -1478,22 +1752,28 @@ int dpm_suspend_late(pm_message_t state) mutex_unlock(&dpm_list_mtx); - error = device_suspend_late(dev, state, false); + device_suspend_late(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); - if (error || async_error) + if (READ_ONCE(async_error)) { + dpm_async_suspend_complete_all(&dpm_suspended_list); + /* + * Move all devices to the target list to resume them + * properly. + */ + list_splice_init(&dpm_suspended_list, &dpm_late_early_list); break; + } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); - if (!error) - error = async_error; + error = READ_ONCE(async_error); if (error) { dpm_save_failed_step(SUSPEND_SUSPEND_LATE); dpm_resume_early(resume_event(state)); @@ -1565,7 +1845,7 @@ static void dpm_clear_superiors_direct_complete(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + dev_for_each_link_to_supplier(link, dev) { spin_lock_irq(&link->supplier->power.lock); link->supplier->power.direct_complete = false; spin_unlock_irq(&link->supplier->power.lock); @@ -1574,13 +1854,15 @@ static void dpm_clear_superiors_direct_complete(struct device *dev) device_links_read_unlock(idx); } +static void async_suspend(void *data, async_cookie_t cookie); + /** * device_suspend - Execute "suspend" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. */ -static int device_suspend(struct device *dev, pm_message_t state, bool async) +static void device_suspend(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -1592,7 +1874,7 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async) dpm_wait_for_subordinate(dev, async); - if (async_error) { + if (READ_ONCE(async_error)) { dev->power.direct_complete = false; goto Complete; } @@ -1612,7 +1894,7 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async) if (pm_wakeup_pending()) { dev->power.direct_complete = false; - async_error = -EBUSY; + WRITE_ONCE(async_error, -EBUSY); goto Complete; } @@ -1628,6 +1910,7 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async) pm_runtime_disable(dev); if (pm_runtime_status_suspended(dev)) { pm_dev_dbg(dev, state, "direct-complete "); + dev->power.is_suspended = true; goto Complete; } @@ -1695,14 +1978,18 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async) Complete: if (error) { - async_error = error; + WRITE_ONCE(async_error, error); dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async" : "", error); } complete_all(&dev->power.completion); TRACE_SUSPEND(error); - return error; + + if (error || READ_ONCE(async_error)) + return; + + dpm_async_suspend_superior(dev, async_suspend); } static void async_suspend(void *data, async_cookie_t cookie) @@ -1720,7 +2007,8 @@ static void async_suspend(void *data, async_cookie_t cookie) int dpm_suspend(pm_message_t state) { ktime_t starttime = ktime_get(); - int error = 0; + struct device *dev; + int error; trace_suspend_resume(TPS("dpm_suspend"), state.event, true); might_sleep(); @@ -1733,8 +2021,18 @@ int dpm_suspend(pm_message_t state) mutex_lock(&dpm_list_mtx); + /* + * Start processing "async" leaf devices upfront so they don't need to + * wait for the "sync" devices they don't depend on. + */ + list_for_each_entry_reverse(dev, &dpm_prepared_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_leaf_device(dev)) + dpm_async_with_cleanup(dev, async_suspend); + } + while (!list_empty(&dpm_prepared_list)) { - struct device *dev = to_device(dpm_prepared_list.prev); + dev = to_device(dpm_prepared_list.prev); list_move(&dev->power.entry, &dpm_suspended_list); @@ -1745,22 +2043,28 @@ int dpm_suspend(pm_message_t state) mutex_unlock(&dpm_list_mtx); - error = device_suspend(dev, state, false); + device_suspend(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); - if (error || async_error) + if (READ_ONCE(async_error)) { + dpm_async_suspend_complete_all(&dpm_prepared_list); + /* + * Move all devices to the target list to resume them + * properly. + */ + list_splice_init(&dpm_prepared_list, &dpm_suspended_list); break; + } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); - if (!error) - error = async_error; + error = READ_ONCE(async_error); if (error) dpm_save_failed_step(SUSPEND_SUSPEND); @@ -1769,6 +2073,46 @@ int dpm_suspend(pm_message_t state) return error; } +static bool device_prepare_smart_suspend(struct device *dev) +{ + struct device_link *link; + bool ret = true; + int idx; + + /* + * The "smart suspend" feature is enabled for devices whose drivers ask + * for it and for devices without PM callbacks. + * + * However, if "smart suspend" is not enabled for the device's parent + * or any of its suppliers that take runtime PM into account, it cannot + * be enabled for the device either. + */ + if (!dev->power.no_pm_callbacks && + !dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) + return false; + + if (dev->parent && !dev_pm_smart_suspend(dev->parent) && + !dev->parent->power.ignore_children && !pm_runtime_blocked(dev->parent)) + return false; + + idx = device_links_read_lock(); + + dev_for_each_link_to_supplier(link, dev) { + if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) + continue; + + if (!dev_pm_smart_suspend(link->supplier) && + !pm_runtime_blocked(link->supplier)) { + ret = false; + break; + } + } + + device_links_read_unlock(idx); + + return ret; +} + /** * device_prepare - Prepare a device for system power transition. * @dev: Device to handle. @@ -1780,6 +2124,7 @@ int dpm_suspend(pm_message_t state) static int device_prepare(struct device *dev, pm_message_t state) { int (*callback)(struct device *) = NULL; + bool smart_suspend; int ret = 0; /* @@ -1789,6 +2134,13 @@ static int device_prepare(struct device *dev, pm_message_t state) * it again during the complete phase. */ pm_runtime_get_noresume(dev); + /* + * If runtime PM is disabled for the device at this point and it has + * never been enabled so far, it should not be enabled until this system + * suspend-resume cycle is complete, so prepare to trigger a warning on + * subsequent attempts to enable it. + */ + smart_suspend = !pm_runtime_block_if_disabled(dev); if (dev->power.syscore) return 0; @@ -1796,6 +2148,7 @@ static int device_prepare(struct device *dev, pm_message_t state) device_lock(dev); dev->power.wakeup_path = false; + dev->power.out_band_wakeup = false; if (dev->power.no_pm_callbacks) goto unlock; @@ -1823,6 +2176,13 @@ unlock: pm_runtime_put(dev); return ret; } + /* Do not enable "smart suspend" for devices with disabled runtime PM. */ + if (smart_suspend) + smart_suspend = device_prepare_smart_suspend(dev); + + spin_lock_irq(&dev->power.lock); + + dev->power.smart_suspend = smart_suspend; /* * A positive return value from ->prepare() means "this device appears * to be runtime-suspended and its state is fine, so if it really is @@ -1830,11 +2190,12 @@ unlock: * will do the same thing with all of its descendants". This only * applies to suspend transitions, however. */ - spin_lock_irq(&dev->power.lock); dev->power.direct_complete = state.event == PM_EVENT_SUSPEND && (ret > 0 || dev->power.no_pm_callbacks) && !dev_pm_test_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); + spin_unlock_irq(&dev->power.lock); + return 0; } @@ -1849,7 +2210,6 @@ int dpm_prepare(pm_message_t state) int error = 0; trace_suspend_resume(TPS("dpm_prepare"), state.event, true); - might_sleep(); /* * Give a chance for the known devices to complete their probes, before @@ -1916,8 +2276,10 @@ int dpm_suspend_start(pm_message_t state) error = dpm_prepare(state); if (error) dpm_save_failed_step(SUSPEND_PREPARE); - else + else { + pm_restrict_gfp_mask(); error = dpm_suspend(state); + } dpm_show_time(starttime, state, error, "start"); return error; @@ -1998,6 +2360,5 @@ void device_pm_check_callbacks(struct device *dev) bool dev_pm_skip_suspend(struct device *dev) { - return dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) && - pm_runtime_status_suspended(dev); + return dev_pm_smart_suspend(dev) && pm_runtime_status_suspended(dev); } diff --git a/drivers/base/power/runtime-test.c b/drivers/base/power/runtime-test.c new file mode 100644 index 000000000000..1535ad2b0264 --- /dev/null +++ b/drivers/base/power/runtime-test.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2025 Google, Inc. + */ + +#include <linux/cleanup.h> +#include <linux/pm_runtime.h> +#include <kunit/device.h> +#include <kunit/test.h> + +#define DEVICE_NAME "pm_runtime_test_device" + +static void pm_runtime_depth_test(struct kunit *test) +{ + struct device *dev = kunit_device_register(test, DEVICE_NAME); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + pm_runtime_enable(dev); + + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + KUNIT_EXPECT_EQ(test, 1, pm_runtime_get_sync(dev)); /* "already active" */ + KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev)); + KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); +} + +/* Test pm_runtime_put() and friends when already suspended. */ +static void pm_runtime_already_suspended_test(struct kunit *test) +{ + struct device *dev = kunit_device_register(test, DEVICE_NAME); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + pm_runtime_enable(dev); + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + + pm_runtime_get_noresume(dev); + KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync(dev)); + + KUNIT_EXPECT_EQ(test, 1, pm_runtime_suspend(dev)); + KUNIT_EXPECT_EQ(test, 1, pm_runtime_autosuspend(dev)); + KUNIT_EXPECT_EQ(test, 1, pm_request_autosuspend(dev)); + + pm_runtime_get_noresume(dev); + KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync_autosuspend(dev)); + + pm_runtime_get_noresume(dev); + pm_runtime_put_autosuspend(dev); + + /* Grab 2 refcounts */ + pm_runtime_get_noresume(dev); + pm_runtime_get_noresume(dev); + /* The first put() sees usage_count 1 */ + KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync_autosuspend(dev)); + /* The second put() sees usage_count 0 but tells us "already suspended". */ + KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync_autosuspend(dev)); + + /* Should have remained suspended the whole time. */ + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); +} + +static void pm_runtime_idle_test(struct kunit *test) +{ + struct device *dev = kunit_device_register(test, DEVICE_NAME); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + pm_runtime_enable(dev); + + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + pm_runtime_put_noidle(dev); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + KUNIT_EXPECT_EQ(test, 0, pm_runtime_idle(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_idle(dev)); +} + +static void pm_runtime_disabled_test(struct kunit *test) +{ + struct device *dev = kunit_device_register(test, DEVICE_NAME); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* Never called pm_runtime_enable() */ + KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev)); + + /* "disabled" is treated as "active" */ + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + KUNIT_EXPECT_FALSE(test, pm_runtime_suspended(dev)); + + /* + * Note: these "fail", but they still acquire/release refcounts, so + * keep them balanced. + */ + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get(dev)); + pm_runtime_put(dev); + + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get_sync(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_put_sync(dev)); + + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get(dev)); + pm_runtime_put_autosuspend(dev); + + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_resume_and_get(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_idle(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_request_idle(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_request_resume(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_request_autosuspend(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_suspend(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_resume(dev)); + KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_autosuspend(dev)); + + /* Still disabled */ + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev)); +} + +static void pm_runtime_error_test(struct kunit *test) +{ + struct device *dev = kunit_device_register(test, DEVICE_NAME); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + pm_runtime_enable(dev); + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + + /* Fake a .runtime_resume() error */ + dev->power.runtime_error = -EIO; + + /* + * Note: these "fail", but they still acquire/release refcounts, so + * keep them balanced. + */ + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev)); + pm_runtime_put(dev); + + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get_sync(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_put_sync(dev)); + + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev)); + pm_runtime_put_autosuspend(dev); + + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_put_sync_autosuspend(dev)); + + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_resume_and_get(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_idle(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_idle(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_resume(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_autosuspend(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_suspend(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_resume(dev)); + KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_autosuspend(dev)); + + /* Error is still pending */ + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + KUNIT_EXPECT_EQ(test, -EIO, dev->power.runtime_error); + /* Clear error */ + KUNIT_EXPECT_EQ(test, 0, pm_runtime_set_suspended(dev)); + KUNIT_EXPECT_EQ(test, 0, dev->power.runtime_error); + /* Still suspended */ + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + + KUNIT_EXPECT_EQ(test, 0, pm_runtime_get(dev)); + pm_runtime_barrier(dev); + pm_runtime_put(dev); + pm_runtime_suspend(dev); /* flush the put(), to suspend */ + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); + + KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev)); + KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev)); + + KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev)); + pm_runtime_put_autosuspend(dev); + + KUNIT_EXPECT_EQ(test, 0, pm_runtime_resume_and_get(dev)); + + /* + * The following should all return -EAGAIN (usage is non-zero) or 1 + * (already resumed). + */ + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_idle(dev)); + KUNIT_EXPECT_EQ(test, 1, pm_request_resume(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_autosuspend(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_suspend(dev)); + KUNIT_EXPECT_EQ(test, 1, pm_runtime_resume(dev)); + KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_autosuspend(dev)); + + KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev)); + + /* Suspended again */ + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); +} + +/* + * Explore a typical probe() sequence in which a device marks itself powered, + * but doesn't hold any runtime PM reference, so it suspends as soon as it goes + * idle. + */ +static void pm_runtime_probe_active_test(struct kunit *test) +{ + struct device *dev = kunit_device_register(test, DEVICE_NAME); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + KUNIT_EXPECT_TRUE(test, pm_runtime_status_suspended(dev)); + + KUNIT_EXPECT_EQ(test, 0, pm_runtime_set_active(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + + pm_runtime_enable(dev); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + + /* Nothing to flush. We stay active. */ + pm_runtime_barrier(dev); + KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev)); + + /* Ask for idle? Now we suspend. */ + KUNIT_EXPECT_EQ(test, 0, pm_runtime_idle(dev)); + KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev)); +} + +static struct kunit_case pm_runtime_test_cases[] = { + KUNIT_CASE(pm_runtime_depth_test), + KUNIT_CASE(pm_runtime_already_suspended_test), + KUNIT_CASE(pm_runtime_idle_test), + KUNIT_CASE(pm_runtime_disabled_test), + KUNIT_CASE(pm_runtime_error_test), + KUNIT_CASE(pm_runtime_probe_active_test), + {} +}; + +static struct kunit_suite pm_runtime_test_suite = { + .name = "pm_runtime_test_cases", + .test_cases = pm_runtime_test_cases, +}; + +kunit_test_suite(pm_runtime_test_suite); +MODULE_DESCRIPTION("Runtime power management unit test suite"); +MODULE_LICENSE("GPL"); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 2ee45841486b..84676cc24221 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -19,10 +19,24 @@ typedef int (*pm_callback_t)(struct device *); +static inline pm_callback_t get_callback_ptr(const void *start, size_t offset) +{ + return *(pm_callback_t *)(start + offset); +} + +static pm_callback_t __rpm_get_driver_callback(struct device *dev, + size_t cb_offset) +{ + if (dev->driver && dev->driver->pm) + return get_callback_ptr(dev->driver->pm, cb_offset); + + return NULL; +} + static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset) { - pm_callback_t cb; const struct dev_pm_ops *ops; + pm_callback_t cb = NULL; if (dev->pm_domain) ops = &dev->pm_domain->ops; @@ -36,12 +50,10 @@ static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset) ops = NULL; if (ops) - cb = *(pm_callback_t *)((void *)ops + cb_offset); - else - cb = NULL; + cb = get_callback_ptr(ops, cb_offset); - if (!cb && dev->driver && dev->driver->pm) - cb = *(pm_callback_t *)((void *)dev->driver->pm + cb_offset); + if (!cb) + cb = __rpm_get_driver_callback(dev, cb_offset); return cb; } @@ -78,7 +90,7 @@ static void update_pm_runtime_accounting(struct device *dev) /* * Because ktime_get_mono_fast_ns() is not monotonic during * timekeeping updates, ensure that 'now' is after the last saved - * timesptamp. + * timestamp. */ if (now < last) return; @@ -205,7 +217,7 @@ static int dev_memalloc_noio(struct device *dev, void *data) * resume/suspend callback of any one of its ancestors(or the * block device itself), the deadlock may be triggered inside the * memory allocation since it might not complete until the block - * device becomes active and the involed page I/O finishes. The + * device becomes active and the involved page I/O finishes. The * situation is pointed out first by Alan Stern. Network device * are involved in iSCSI kind of situation. * @@ -290,7 +302,7 @@ static int rpm_get_suppliers(struct device *dev) device_links_read_lock_held()) { int retval; - if (!(link->flags & DL_FLAG_PM_RUNTIME)) + if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) continue; retval = pm_runtime_get_sync(link->supplier); @@ -448,8 +460,19 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) retval = __rpm_callback(cb, dev); } - dev->power.runtime_error = retval; - return retval != -EACCES ? retval : -EIO; + /* + * Since -EACCES means that runtime PM is disabled for the given device, + * it should not be returned by runtime PM callbacks. If it is returned + * nevertheless, assume it to be a transient error and convert it to + * -EAGAIN. + */ + if (retval == -EACCES) + retval = -EAGAIN; + + if (retval != -EAGAIN && retval != -EBUSY) + dev->power.runtime_error = retval; + + return retval; } /** @@ -475,6 +498,9 @@ static int rpm_idle(struct device *dev, int rpmflags) if (retval < 0) ; /* Conditions are wrong. */ + else if ((rpmflags & RPM_GET_PUT) && retval == 1) + ; /* put() is allowed in RPM_SUSPENDED */ + /* Idle notifications are allowed only in the RPM_ACTIVE state. */ else if (dev->power.runtime_status != RPM_ACTIVE) retval = -EAGAIN; @@ -725,21 +751,18 @@ static int rpm_suspend(struct device *dev, int rpmflags) dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); - if (retval == -EAGAIN || retval == -EBUSY) { - dev->power.runtime_error = 0; + /* + * On transient errors, if the callback routine failed an autosuspend, + * and if the last_busy time has been updated so that there is a new + * autosuspend expiration time, automatically reschedule another + * autosuspend. + */ + if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) && + pm_runtime_autosuspend_expiration(dev) != 0) + goto repeat; + + pm_runtime_cancel_pending(dev); - /* - * If the callback routine failed an autosuspend, and - * if the last_busy time has been updated so that there - * is a new autosuspend expiration time, automatically - * reschedule another autosuspend. - */ - if ((rpmflags & RPM_AUTO) && - pm_runtime_autosuspend_expiration(dev) != 0) - goto repeat; - } else { - pm_runtime_cancel_pending(dev); - } goto out; } @@ -776,6 +799,8 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.runtime_status == RPM_ACTIVE && dev->power.last_status == RPM_ACTIVE) retval = 1; + else if (rpmflags & RPM_TRANSPARENT) + goto out; else retval = -EACCES; } @@ -1003,7 +1028,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) * If 'expires' is after the current time, we've been called * too early. */ - if (expires > 0 && expires < ktime_get_mono_fast_ns()) { + if (expires > 0 && expires <= ktime_get_mono_fast_ns()) { dev->power.timer_expires = 0; rpm_suspend(dev, dev->power.timer_autosuspends ? (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC); @@ -1183,10 +1208,12 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume); * * Return -EINVAL if runtime PM is disabled for @dev. * - * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either - * @ign_usage_count is %true or the runtime PM usage counter of @dev is not - * zero, increment the usage counter of @dev and return 1. Otherwise, return 0 - * without changing the usage counter. + * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count + * is set, or (2) @dev is not ignoring children and its active child count is + * nonzero, or (3) the runtime PM usage counter of @dev is not zero, increment + * the usage counter of @dev and return 1. + * + * Otherwise, return 0 without changing the usage counter. * * If @ign_usage_count is %true, this function can be used to prevent suspending * the device when its runtime PM status is %RPM_ACTIVE. @@ -1208,7 +1235,8 @@ static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count) retval = -EINVAL; } else if (dev->power.runtime_status != RPM_ACTIVE) { retval = 0; - } else if (ign_usage_count) { + } else if (ign_usage_count || (!dev->power.ignore_children && + atomic_read(&dev->power.child_count) > 0)) { retval = 1; atomic_inc(&dev->power.usage_count); } else { @@ -1241,10 +1269,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_get_if_active); * @dev: Target device. * * Increment the runtime PM usage counter of @dev if its runtime PM status is - * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case - * it returns 1. If the device is in a different state or its usage_count is 0, - * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device, - * in which case also the usage_count will remain unmodified. + * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not + * ignoring children and its active child count is nonzero. 1 is returned in + * this case. + * + * If @dev is in a different state or it is not in use (that is, its usage + * counter is 0, or it is ignoring children, or its active child count is 0), + * 0 is returned. + * + * -EINVAL is returned if runtime PM is disabled for the device, in which case + * also the usage counter of @dev is not updated. */ int pm_runtime_get_if_in_use(struct device *dev) { @@ -1433,47 +1467,48 @@ static void __pm_runtime_barrier(struct device *dev) * Next, make sure that all pending requests for the device have been flushed * from pm_wq and wait for all runtime PM operations involving the device in * progress to complete. - * - * Return value: - * 1, if there was a resume request pending and the device had to be woken up, - * 0, otherwise */ -int pm_runtime_barrier(struct device *dev) +void pm_runtime_barrier(struct device *dev) { - int retval = 0; - pm_runtime_get_noresume(dev); spin_lock_irq(&dev->power.lock); if (dev->power.request_pending - && dev->power.request == RPM_REQ_RESUME) { + && dev->power.request == RPM_REQ_RESUME) rpm_resume(dev, 0); - retval = 1; - } __pm_runtime_barrier(dev); spin_unlock_irq(&dev->power.lock); pm_runtime_put_noidle(dev); - - return retval; } EXPORT_SYMBOL_GPL(pm_runtime_barrier); -/** - * __pm_runtime_disable - Disable runtime PM of a device. - * @dev: Device to handle. - * @check_resume: If set, check if there's a resume request for the device. - * - * Increment power.disable_depth for the device and if it was zero previously, - * cancel all pending runtime PM requests for the device and wait for all - * operations in progress to complete. The device can be either active or - * suspended after its runtime PM has been disabled. - * - * If @check_resume is set and there's a resume request pending when - * __pm_runtime_disable() is called and power.disable_depth is zero, the - * function will wake up the device before disabling its runtime PM. - */ +bool pm_runtime_block_if_disabled(struct device *dev) +{ + bool ret; + + spin_lock_irq(&dev->power.lock); + + ret = !pm_runtime_enabled(dev); + if (ret && dev->power.last_status == RPM_INVALID) + dev->power.last_status = RPM_BLOCKED; + + spin_unlock_irq(&dev->power.lock); + + return ret; +} + +void pm_runtime_unblock(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + + if (dev->power.last_status == RPM_BLOCKED) + dev->power.last_status = RPM_INVALID; + + spin_unlock_irq(&dev->power.lock); +} + void __pm_runtime_disable(struct device *dev, bool check_resume) { spin_lock_irq(&dev->power.lock); @@ -1532,6 +1567,10 @@ void pm_runtime_enable(struct device *dev) if (--dev->power.disable_depth > 0) goto out; + if (dev->power.last_status == RPM_BLOCKED) { + dev_warn(dev, "Attempt to enable runtime PM when it is blocked\n"); + dump_stack(); + } dev->power.last_status = RPM_INVALID; dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); @@ -1545,6 +1584,32 @@ out: } EXPORT_SYMBOL_GPL(pm_runtime_enable); +static void pm_runtime_set_suspended_action(void *data) +{ + pm_runtime_set_suspended(data); +} + +/** + * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable. + * + * @dev: Device to handle. + */ +int devm_pm_runtime_set_active_enabled(struct device *dev) +{ + int err; + + err = pm_runtime_set_active(dev); + if (err) + return err; + + err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev); + if (err) + return err; + + return devm_pm_runtime_enable(dev); +} +EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled); + static void pm_runtime_disable_action(void *data) { pm_runtime_dont_use_autosuspend(data); @@ -1567,13 +1632,34 @@ int devm_pm_runtime_enable(struct device *dev) } EXPORT_SYMBOL_GPL(devm_pm_runtime_enable); +static void pm_runtime_put_noidle_action(void *data) +{ + pm_runtime_put_noidle(data); +} + +/** + * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume. + * + * @dev: Device to handle. + */ +int devm_pm_runtime_get_noresume(struct device *dev) +{ + pm_runtime_get_noresume(dev); + + return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev); +} +EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume); + /** * pm_runtime_forbid - Block runtime PM of a device. * @dev: Device to handle. * - * Increase the device's usage count and clear its power.runtime_auto flag, - * so that it cannot be suspended at run time until pm_runtime_allow() is called - * for it. + * Resume @dev if already suspended and block runtime suspend of @dev in such + * a way that it can be unblocked via the /sys/devices/.../power/control + * interface, or otherwise by calling pm_runtime_allow(). + * + * Calling this function many times in a row has the same effect as calling it + * once. */ void pm_runtime_forbid(struct device *dev) { @@ -1594,7 +1680,13 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid); * pm_runtime_allow - Unblock runtime PM of a device. * @dev: Device to handle. * - * Decrease the device's usage count and set its power.runtime_auto flag. + * Unblock runtime suspend of @dev after it has been blocked by + * pm_runtime_forbid() (for instance, if it has been blocked via the + * /sys/devices/.../power/control interface), check if @dev can be + * suspended and suspend it in that case. + * + * Calling this function many times in a row has the same effect as calling it + * once. */ void pm_runtime_allow(struct device *dev) { @@ -1760,12 +1852,12 @@ void pm_runtime_init(struct device *dev) dev->power.request_pending = false; dev->power.request = RPM_REQ_NONE; dev->power.deferred_resume = false; - dev->power.needs_force_resume = 0; + dev->power.needs_force_resume = false; INIT_WORK(&dev->power.work, pm_runtime_work); dev->power.timer_expires = 0; - hrtimer_init(&dev->power.suspend_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - dev->power.suspend_timer.function = pm_suspend_timer_fn; + hrtimer_setup(&dev->power.suspend_timer, pm_suspend_timer_fn, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); init_waitqueue_head(&dev->power.wait_queue); } @@ -1787,6 +1879,11 @@ void pm_runtime_reinit(struct device *dev) pm_runtime_put(dev->parent); } } + /* + * Clear power.needs_force_resume in case it has been set by + * pm_runtime_force_suspend() invoked from a driver remove callback. + */ + dev->power.needs_force_resume = false; } /** @@ -1810,9 +1907,8 @@ void pm_runtime_get_suppliers(struct device *dev) idx = device_links_read_lock(); - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) - if (link->flags & DL_FLAG_PM_RUNTIME) { + dev_for_each_link_to_supplier(link, dev) + if (device_link_test(link, DL_FLAG_PM_RUNTIME)) { link->supplier_preactivated = true; pm_runtime_get_sync(link->supplier); } @@ -1866,7 +1962,7 @@ static void pm_runtime_drop_link_count(struct device *dev) */ void pm_runtime_drop_link(struct device_link *link) { - if (!(link->flags & DL_FLAG_PM_RUNTIME)) + if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) return; pm_runtime_drop_link_count(link->consumer); @@ -1874,13 +1970,23 @@ void pm_runtime_drop_link(struct device_link *link) pm_request_idle(link->supplier); } -static bool pm_runtime_need_not_resume(struct device *dev) +static pm_callback_t get_callback(struct device *dev, size_t cb_offset) { - return atomic_read(&dev->power.usage_count) <= 1 && - (atomic_read(&dev->power.child_count) == 0 || - dev->power.ignore_children); + /* + * Setting power.strict_midlayer means that the middle layer + * code does not want its runtime PM callbacks to be invoked via + * pm_runtime_force_suspend() and pm_runtime_force_resume(), so + * return a direct pointer to the driver callback in that case. + */ + if (dev_pm_strict_midlayer_is_set(dev)) + return __rpm_get_driver_callback(dev, cb_offset); + + return __rpm_get_callback(dev, cb_offset); } +#define GET_CALLBACK(dev, callback) \ + get_callback(dev, offsetof(struct dev_pm_ops, callback)) + /** * pm_runtime_force_suspend - Force a device into suspend state if needed. * @dev: Device to suspend. @@ -1897,10 +2003,6 @@ static bool pm_runtime_need_not_resume(struct device *dev) * sure the device is put into low power state and it should only be used during * system-wide PM transitions to sleep states. It assumes that the analogous * pm_runtime_force_resume() will be used to resume the device. - * - * Do not use with DPM_FLAG_SMART_SUSPEND as this can lead to an inconsistent - * state where this function has called the ->runtime_suspend callback but the - * PM core marks the driver as runtime active. */ int pm_runtime_force_suspend(struct device *dev) { @@ -1908,10 +2010,10 @@ int pm_runtime_force_suspend(struct device *dev) int ret; pm_runtime_disable(dev); - if (pm_runtime_status_suspended(dev)) + if (pm_runtime_status_suspended(dev) || dev->power.needs_force_resume) return 0; - callback = RPM_GET_CALLBACK(dev, runtime_suspend); + callback = GET_CALLBACK(dev, runtime_suspend); dev_pm_enable_wake_irq_check(dev, true); ret = callback ? callback(dev) : 0; @@ -1923,15 +2025,16 @@ int pm_runtime_force_suspend(struct device *dev) /* * If the device can stay in suspend after the system-wide transition * to the working state that will follow, drop the children counter of - * its parent, but set its status to RPM_SUSPENDED anyway in case this - * function will be called again for it in the meantime. + * its parent and the usage counters of its suppliers. Otherwise, set + * power.needs_force_resume to let pm_runtime_force_resume() know that + * the device needs to be taken care of and to prevent this function + * from handling the device again in case the device is passed to it + * once more subsequently. */ - if (pm_runtime_need_not_resume(dev)) { + if (pm_runtime_need_not_resume(dev)) pm_runtime_set_suspended(dev); - } else { - __update_runtime_status(dev, RPM_SUSPENDED); - dev->power.needs_force_resume = 1; - } + else + dev->power.needs_force_resume = true; return 0; @@ -1942,33 +2045,37 @@ err: } EXPORT_SYMBOL_GPL(pm_runtime_force_suspend); +#ifdef CONFIG_PM_SLEEP + /** * pm_runtime_force_resume - Force a device into resume state if needed. * @dev: Device to resume. * - * Prior invoking this function we expect the user to have brought the device - * into low power state by a call to pm_runtime_force_suspend(). Here we reverse - * those actions and bring the device into full power, if it is expected to be - * used on system resume. In the other case, we defer the resume to be managed - * via runtime PM. + * This function expects that either pm_runtime_force_suspend() has put the + * device into a low-power state prior to calling it, or the device had been + * runtime-suspended before the preceding system-wide suspend transition and it + * was left in suspend during that transition. + * + * The actions carried out by pm_runtime_force_suspend(), or by a runtime + * suspend in general, are reversed and the device is brought back into full + * power if it is expected to be used on system resume, which is the case when + * its needs_force_resume flag is set or when its smart_suspend flag is set and + * its runtime PM status is "active". * - * Typically this function may be invoked from a system resume callback. + * In other cases, the resume is deferred to be managed via runtime PM. + * + * Typically, this function may be invoked from a system resume callback. */ int pm_runtime_force_resume(struct device *dev) { int (*callback)(struct device *); int ret = 0; - if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume) + if (!dev->power.needs_force_resume && (!dev_pm_smart_suspend(dev) || + pm_runtime_status_suspended(dev))) goto out; - /* - * The value of the parent's children counter is correct already, so - * just update the status of the device. - */ - __update_runtime_status(dev, RPM_ACTIVE); - - callback = RPM_GET_CALLBACK(dev, runtime_resume); + callback = GET_CALLBACK(dev, runtime_resume); dev_pm_disable_wake_irq_check(dev, false); ret = callback ? callback(dev) : 0; @@ -1979,9 +2086,30 @@ int pm_runtime_force_resume(struct device *dev) } pm_runtime_mark_last_busy(dev); + out: - dev->power.needs_force_resume = 0; + /* + * The smart_suspend flag can be cleared here because it is not going + * to be necessary until the next system-wide suspend transition that + * will update it again. + */ + dev->power.smart_suspend = false; + /* + * Also clear needs_force_resume to make this function skip devices that + * have been seen by it once. + */ + dev->power.needs_force_resume = false; + pm_runtime_enable(dev); return ret; } EXPORT_SYMBOL_GPL(pm_runtime_force_resume); + +bool pm_runtime_need_not_resume(struct device *dev) +{ + return atomic_read(&dev->power.usage_count) <= 1 && + (atomic_read(&dev->power.child_count) == 0 || + dev->power.ignore_children); +} + +#endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index a1474fb67db9..13b31a3adc77 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -6,7 +6,6 @@ #include <linux/export.h> #include <linux/pm_qos.h> #include <linux/pm_runtime.h> -#include <linux/pm_wakeup.h> #include <linux/atomic.h> #include <linux/jiffies.h> #include "power.h" @@ -509,14 +508,6 @@ static ssize_t wakeup_last_time_ms_show(struct device *dev, return sysfs_emit(buf, "%lld\n", msec); } -static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, - kgid_t kgid) -{ - if (dev->power.wakeup && dev->power.wakeup->dev) - return device_change_owner(dev->power.wakeup->dev, kuid, kgid); - return 0; -} - static DEVICE_ATTR_RO(wakeup_last_time_ms); #ifdef CONFIG_PM_AUTOSLEEP @@ -541,6 +532,15 @@ static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev, static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); #endif /* CONFIG_PM_AUTOSLEEP */ + +static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, + kgid_t kgid) +{ + if (dev->power.wakeup && dev->power.wakeup->dev) + return device_change_owner(dev->power.wakeup->dev, kuid, kgid); + return 0; +} + #else /* CONFIG_PM_SLEEP */ static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) @@ -611,15 +611,9 @@ static DEVICE_ATTR_RW(async); #endif /* CONFIG_PM_ADVANCED_DEBUG */ static struct attribute *power_attrs[] = { -#ifdef CONFIG_PM_ADVANCED_DEBUG -#ifdef CONFIG_PM_SLEEP +#if defined(CONFIG_PM_ADVANCED_DEBUG) && defined(CONFIG_PM_SLEEP) &dev_attr_async.attr, #endif - &dev_attr_runtime_status.attr, - &dev_attr_runtime_usage.attr, - &dev_attr_runtime_active_kids.attr, - &dev_attr_runtime_enabled.attr, -#endif /* CONFIG_PM_ADVANCED_DEBUG */ NULL, }; static const struct attribute_group pm_attr_group = { @@ -650,13 +644,16 @@ static const struct attribute_group pm_wakeup_attr_group = { }; static struct attribute *runtime_attrs[] = { -#ifndef CONFIG_PM_ADVANCED_DEBUG &dev_attr_runtime_status.attr, -#endif &dev_attr_control.attr, &dev_attr_runtime_suspended_time.attr, &dev_attr_runtime_active_time.attr, &dev_attr_autosuspend_delay_ms.attr, +#ifdef CONFIG_PM_ADVANCED_DEBUG + &dev_attr_runtime_usage.attr, + &dev_attr_runtime_active_kids.attr, + &dev_attr_runtime_enabled.attr, +#endif NULL, }; static const struct attribute_group pm_runtime_attr_group = { diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index cd6e559648b2..d8da7195bb00 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -238,10 +238,8 @@ int show_trace_dev_match(char *buf, size_t size) unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH); if (hash == value) { - int len = snprintf(buf, size, "%s\n", + int len = scnprintf(buf, size, "%s\n", dev_driver_string(dev)); - if (len > size) - len = size; buf += len; ret += len; size -= len; diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 5a5a9e978e85..8aa28c08b289 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -103,6 +103,32 @@ void dev_pm_clear_wake_irq(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq); +static void devm_pm_clear_wake_irq(void *dev) +{ + dev_pm_clear_wake_irq(dev); +} + +/** + * devm_pm_set_wake_irq - device-managed variant of dev_pm_set_wake_irq + * @dev: Device entry + * @irq: Device IO interrupt + * + * + * Attach a device IO interrupt as a wake IRQ, same with dev_pm_set_wake_irq, + * but the device will be auto clear wake capability on driver detach. + */ +int devm_pm_set_wake_irq(struct device *dev, int irq) +{ + int ret; + + ret = dev_pm_set_wake_irq(dev, irq); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, devm_pm_clear_wake_irq, dev); +} +EXPORT_SYMBOL_GPL(devm_pm_set_wake_irq); + /** * handle_threaded_wake_irq - Handler for dedicated wake-up interrupts * @irq: Device specific dedicated wake-up interrupt diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 752b417e8129..1e1a0e7eeac5 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -77,7 +77,7 @@ static DEFINE_IDA(wakeup_ida); * wakeup_source_create - Create a struct wakeup_source object. * @name: Name of the new wakeup source. */ -struct wakeup_source *wakeup_source_create(const char *name) +static struct wakeup_source *wakeup_source_create(const char *name) { struct wakeup_source *ws; const char *ws_name; @@ -106,7 +106,6 @@ err_name: err_ws: return NULL; } -EXPORT_SYMBOL_GPL(wakeup_source_create); /* * Record wakeup_source statistics being deleted into a dummy wakeup_source. @@ -149,7 +148,7 @@ static void wakeup_source_free(struct wakeup_source *ws) * * Use only for wakeup source objects created with wakeup_source_create(). */ -void wakeup_source_destroy(struct wakeup_source *ws) +static void wakeup_source_destroy(struct wakeup_source *ws) { if (!ws) return; @@ -158,13 +157,12 @@ void wakeup_source_destroy(struct wakeup_source *ws) wakeup_source_record(ws); wakeup_source_free(ws); } -EXPORT_SYMBOL_GPL(wakeup_source_destroy); /** * wakeup_source_add - Add given object to the list of wakeup sources. * @ws: Wakeup source object to add to the list. */ -void wakeup_source_add(struct wakeup_source *ws) +static void wakeup_source_add(struct wakeup_source *ws) { unsigned long flags; @@ -179,32 +177,29 @@ void wakeup_source_add(struct wakeup_source *ws) list_add_rcu(&ws->entry, &wakeup_sources); raw_spin_unlock_irqrestore(&events_lock, flags); } -EXPORT_SYMBOL_GPL(wakeup_source_add); /** * wakeup_source_remove - Remove given object from the wakeup sources list. * @ws: Wakeup source object to remove from the list. */ -void wakeup_source_remove(struct wakeup_source *ws) +static void wakeup_source_remove(struct wakeup_source *ws) { unsigned long flags; if (WARN_ON(!ws)) return; + /* + * After shutting down the timer, wakeup_source_activate() will warn if + * the given wakeup source is passed to it. + */ + timer_shutdown_sync(&ws->timer); + raw_spin_lock_irqsave(&events_lock, flags); list_del_rcu(&ws->entry); raw_spin_unlock_irqrestore(&events_lock, flags); synchronize_srcu(&wakeup_srcu); - - del_timer_sync(&ws->timer); - /* - * Clear timer.function to make wakeup_source_not_registered() treat - * this wakeup source as not registered. - */ - ws->timer.function = NULL; } -EXPORT_SYMBOL_GPL(wakeup_source_remove); /** * wakeup_source_register - Create wakeup source and add it to the list. @@ -337,7 +332,7 @@ int device_wakeup_enable(struct device *dev) if (!dev || !dev->power.can_wakeup) return -EINVAL; - if (pm_suspend_target_state != PM_SUSPEND_ON) + if (pm_sleep_transition_in_progress()) dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__); ws = wakeup_source_register(dev, dev_name(dev)); @@ -510,14 +505,14 @@ int device_set_wakeup_enable(struct device *dev, bool enable) EXPORT_SYMBOL_GPL(device_set_wakeup_enable); /** - * wakeup_source_not_registered - validate the given wakeup source. + * wakeup_source_not_usable - validate the given wakeup source. * @ws: Wakeup source to be validated. */ -static bool wakeup_source_not_registered(struct wakeup_source *ws) +static bool wakeup_source_not_usable(struct wakeup_source *ws) { /* - * Use timer struct to check if the given source is initialized - * by wakeup_source_add. + * Use the timer struct to check if the given wakeup source has been + * initialized by wakeup_source_add() and it is not going away. */ return ws->timer.function != pm_wakeup_timer_fn; } @@ -562,8 +557,7 @@ static void wakeup_source_activate(struct wakeup_source *ws) { unsigned int cec; - if (WARN_ONCE(wakeup_source_not_registered(ws), - "unregistered wakeup source\n")) + if (WARN_ONCE(wakeup_source_not_usable(ws), "unusable wakeup source\n")) return; ws->active = true; @@ -613,7 +607,7 @@ void __pm_stay_awake(struct wakeup_source *ws) spin_lock_irqsave(&ws->lock, flags); wakeup_source_report_event(ws, false); - del_timer(&ws->timer); + timer_delete(&ws->timer); ws->timer_expires = 0; spin_unlock_irqrestore(&ws->lock, flags); @@ -693,7 +687,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) ws->max_time = duration; ws->last_time = now; - del_timer(&ws->timer); + timer_delete(&ws->timer); ws->timer_expires = 0; if (ws->autosleep_enabled) @@ -763,7 +757,7 @@ EXPORT_SYMBOL_GPL(pm_relax); */ static void pm_wakeup_timer_fn(struct timer_list *t) { - struct wakeup_source *ws = from_timer(ws, t, timer); + struct wakeup_source *ws = timer_container_of(ws, t, timer); unsigned long flags; spin_lock_irqsave(&ws->lock, flags); diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c index 6732ed2869f9..3ffd427248e8 100644 --- a/drivers/base/power/wakeup_stats.c +++ b/drivers/base/power/wakeup_stats.c @@ -34,6 +34,7 @@ wakeup_attr(active_count); wakeup_attr(event_count); wakeup_attr(wakeup_count); wakeup_attr(expire_count); +wakeup_attr(relax_count); static ssize_t active_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -119,6 +120,7 @@ static struct attribute *wakeup_source_attrs[] = { &dev_attr_event_count.attr, &dev_attr_wakeup_count.attr, &dev_attr_expire_count.attr, + &dev_attr_relax_count.attr, &dev_attr_active_time_ms.attr, &dev_attr_total_time_ms.attr, &dev_attr_max_time_ms.attr, diff --git a/drivers/base/property.c b/drivers/base/property.c index 837d77e3af2b..6a63860579dd 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -71,6 +71,44 @@ bool fwnode_property_present(const struct fwnode_handle *fwnode, EXPORT_SYMBOL_GPL(fwnode_property_present); /** + * device_property_read_bool - Return the value for a boolean property of a device + * @dev: Device whose property is being checked + * @propname: Name of the property + * + * Return if property @propname is true or false in the device firmware description. + * + * Return: true if property @propname is present. Otherwise, returns false. + */ +bool device_property_read_bool(const struct device *dev, const char *propname) +{ + return fwnode_property_read_bool(dev_fwnode(dev), propname); +} +EXPORT_SYMBOL_GPL(device_property_read_bool); + +/** + * fwnode_property_read_bool - Return the value for a boolean property of a firmware node + * @fwnode: Firmware node whose property to check + * @propname: Name of the property + * + * Return if property @propname is true or false in the firmware description. + */ +bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, + const char *propname) +{ + bool ret; + + if (IS_ERR_OR_NULL(fwnode)) + return false; + + ret = fwnode_call_bool_op(fwnode, property_read_bool, propname); + if (ret) + return ret; + + return fwnode_call_bool_op(fwnode->secondary, property_read_bool, propname); +} +EXPORT_SYMBOL_GPL(fwnode_property_read_bool); + +/** * device_property_read_u8_array - return a u8 array property of a device * @dev: Device to get the property of * @propname: Name of the property @@ -540,7 +578,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_match_property_string); * @prop: The name of the property * @nargs_prop: The name of the property telling the number of * arguments in the referred node. NULL if @nargs is known, - * otherwise @nargs is ignored. Only relevant on OF. + * otherwise @nargs is ignored. * @nargs: Number of arguments. Ignored if @nargs_prop is non-NULL. * @index: Index of the reference, from zero onwards. * @args: Result structure with reference and integer arguments. @@ -890,22 +928,49 @@ bool fwnode_device_is_available(const struct fwnode_handle *fwnode) EXPORT_SYMBOL_GPL(fwnode_device_is_available); /** - * device_get_child_node_count - return the number of child nodes for device - * @dev: Device to count the child nodes for + * fwnode_get_child_node_count - return the number of child nodes for a given firmware node + * @fwnode: Pointer to the parent firmware node * - * Return: the number of child nodes for a given device. + * Return: the number of child nodes for a given firmware node. + */ +unsigned int fwnode_get_child_node_count(const struct fwnode_handle *fwnode) +{ + struct fwnode_handle *child; + unsigned int count = 0; + + fwnode_for_each_child_node(fwnode, child) + count++; + + return count; +} +EXPORT_SYMBOL_GPL(fwnode_get_child_node_count); + +/** + * fwnode_get_named_child_node_count - number of child nodes with given name + * @fwnode: Node which child nodes are counted. + * @name: String to match child node name against. + * + * Scan child nodes and count all the nodes with a specific name. Potential + * 'number' -ending after the 'at sign' for scanned names is ignored. + * E.g.:: + * fwnode_get_named_child_node_count(fwnode, "channel"); + * would match all the nodes:: + * channel { }, channel@0 {}, channel@0xabba {}... + * + * Return: the number of child nodes with a matching name for a given device. */ -unsigned int device_get_child_node_count(const struct device *dev) +unsigned int fwnode_get_named_child_node_count(const struct fwnode_handle *fwnode, + const char *name) { struct fwnode_handle *child; unsigned int count = 0; - device_for_each_child_node(dev, child) + fwnode_for_each_named_child_node(fwnode, child, name) count++; return count; } -EXPORT_SYMBOL_GPL(device_get_child_node_count); +EXPORT_SYMBOL_GPL(fwnode_get_named_child_node_count); bool device_dma_supported(const struct device *dev) { diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig index b1affac70d5d..ffb2ef488298 100644 --- a/drivers/base/regmap/Kconfig +++ b/drivers/base/regmap/Kconfig @@ -6,8 +6,6 @@ config REGMAP bool default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO || REGMAP_FSI) - select IRQ_DOMAIN if REGMAP_IRQ - select MDIO_BUS if REGMAP_MDIO help Enable support for the Register Map (regmap) access API. @@ -58,12 +56,14 @@ config REGMAP_W1 config REGMAP_MDIO tristate + select MDIO_BUS config REGMAP_MMIO tristate config REGMAP_IRQ bool + select IRQ_DOMAIN config REGMAP_RAM tristate diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index bdb450436cbc..1477329410ec 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -73,12 +73,12 @@ struct regmap { void *bus_context; const char *name; - bool async; spinlock_t async_lock; wait_queue_head_t async_waitq; struct list_head async_list; struct list_head async_free; int async_ret; + bool async; #ifdef CONFIG_DEBUG_FS bool debugfs_disable; @@ -117,8 +117,6 @@ struct regmap { void *val_buf, size_t val_size); int (*write)(void *context, const void *data, size_t count); - bool defer_caching; - unsigned long read_flag_mask; unsigned long write_flag_mask; @@ -127,6 +125,8 @@ struct regmap { int reg_stride; int reg_stride_order; + bool defer_caching; + /* If set, will always write field to HW. */ bool force_write_field; @@ -161,6 +161,9 @@ struct regmap { struct reg_sequence *patch; int patch_regs; + /* if set, the regmap core can sleep */ + bool can_sleep; + /* if set, converts bulk read to single read */ bool use_single_read; /* if set, converts bulk write to single write */ @@ -176,9 +179,6 @@ struct regmap { void *selector_work_buf; /* Scratch buffer used for selector */ struct hwspinlock *hwlock; - - /* if set, the regmap core can sleep */ - bool can_sleep; }; struct regcache_ops { @@ -186,6 +186,7 @@ struct regcache_ops { enum regcache_type type; int (*init)(struct regmap *map); int (*exit)(struct regmap *map); + int (*populate)(struct regmap *map); #ifdef CONFIG_DEBUG_FS void (*debugfs_init)(struct regmap *map); #endif @@ -288,6 +289,7 @@ enum regmap_endian regmap_get_val_endian(struct device *dev, const struct regmap_bus *bus, const struct regmap_config *config); +extern struct regcache_ops regcache_flat_sparse_ops; extern struct regcache_ops regcache_rbtree_ops; extern struct regcache_ops regcache_maple_ops; extern struct regcache_ops regcache_flat_ops; diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c index f36d3618b67c..53cc59c84e2f 100644 --- a/drivers/base/regmap/regcache-flat.c +++ b/drivers/base/regmap/regcache-flat.c @@ -6,7 +6,11 @@ // // Author: Mark Brown <broonie@opensource.wolfsonmicro.com> +#include <linux/bitmap.h> +#include <linux/bitops.h> #include <linux/device.h> +#include <linux/limits.h> +#include <linux/overflow.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -18,46 +22,92 @@ static inline unsigned int regcache_flat_get_index(const struct regmap *map, return regcache_get_index_by_order(map, reg); } +struct regcache_flat_data { + unsigned long *valid; + unsigned int data[]; +}; + static int regcache_flat_init(struct regmap *map) { - int i; - unsigned int *cache; + unsigned int cache_size; + struct regcache_flat_data *cache; if (!map || map->reg_stride_order < 0 || !map->max_register_is_set) return -EINVAL; - map->cache = kcalloc(regcache_flat_get_index(map, map->max_register) - + 1, sizeof(unsigned int), map->alloc_flags); - if (!map->cache) + cache_size = regcache_flat_get_index(map, map->max_register) + 1; + cache = kzalloc(struct_size(cache, data, cache_size), map->alloc_flags); + if (!cache) return -ENOMEM; - cache = map->cache; + cache->valid = bitmap_zalloc(cache_size, map->alloc_flags); + if (!cache->valid) + goto err_free; + + map->cache = cache; + + return 0; + +err_free: + kfree(cache); + return -ENOMEM; +} + +static int regcache_flat_exit(struct regmap *map) +{ + struct regcache_flat_data *cache = map->cache; + + if (cache) + bitmap_free(cache->valid); + + kfree(cache); + map->cache = NULL; + + return 0; +} + +static int regcache_flat_populate(struct regmap *map) +{ + struct regcache_flat_data *cache = map->cache; + unsigned int i; for (i = 0; i < map->num_reg_defaults; i++) { unsigned int reg = map->reg_defaults[i].reg; unsigned int index = regcache_flat_get_index(map, reg); - cache[index] = map->reg_defaults[i].def; + cache->data[index] = map->reg_defaults[i].def; + __set_bit(index, cache->valid); } return 0; } -static int regcache_flat_exit(struct regmap *map) +static int regcache_flat_read(struct regmap *map, + unsigned int reg, unsigned int *value) { - kfree(map->cache); - map->cache = NULL; + struct regcache_flat_data *cache = map->cache; + unsigned int index = regcache_flat_get_index(map, reg); + + /* legacy behavior: ignore validity, but warn the user */ + if (unlikely(!test_bit(index, cache->valid))) + dev_warn_once(map->dev, + "using zero-initialized flat cache, this may cause unexpected behavior"); + + *value = cache->data[index]; return 0; } -static int regcache_flat_read(struct regmap *map, - unsigned int reg, unsigned int *value) +static int regcache_flat_sparse_read(struct regmap *map, + unsigned int reg, unsigned int *value) { - unsigned int *cache = map->cache; + struct regcache_flat_data *cache = map->cache; unsigned int index = regcache_flat_get_index(map, reg); - *value = cache[index]; + if (unlikely(!test_bit(index, cache->valid))) + return -ENOENT; + + *value = cache->data[index]; return 0; } @@ -65,10 +115,23 @@ static int regcache_flat_read(struct regmap *map, static int regcache_flat_write(struct regmap *map, unsigned int reg, unsigned int value) { - unsigned int *cache = map->cache; + struct regcache_flat_data *cache = map->cache; unsigned int index = regcache_flat_get_index(map, reg); - cache[index] = value; + cache->data[index] = value; + __set_bit(index, cache->valid); + + return 0; +} + +static int regcache_flat_drop(struct regmap *map, unsigned int min, + unsigned int max) +{ + struct regcache_flat_data *cache = map->cache; + unsigned int bitmap_min = regcache_flat_get_index(map, min); + unsigned int bitmap_max = regcache_flat_get_index(map, max); + + bitmap_clear(cache->valid, bitmap_min, bitmap_max + 1 - bitmap_min); return 0; } @@ -78,6 +141,18 @@ struct regcache_ops regcache_flat_ops = { .name = "flat", .init = regcache_flat_init, .exit = regcache_flat_exit, + .populate = regcache_flat_populate, .read = regcache_flat_read, .write = regcache_flat_write, }; + +struct regcache_ops regcache_flat_sparse_ops = { + .type = REGCACHE_FLAT_S, + .name = "flat-sparse", + .init = regcache_flat_init, + .exit = regcache_flat_exit, + .populate = regcache_flat_populate, + .read = regcache_flat_sparse_read, + .write = regcache_flat_write, + .drop = regcache_flat_drop, +}; diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index 23da7b31d715..ca1c72b68f31 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -73,8 +73,7 @@ static int regcache_maple_write(struct regmap *map, unsigned int reg, rcu_read_unlock(); - entry = kmalloc((last - index + 1) * sizeof(unsigned long), - map->alloc_flags); + entry = kmalloc_array(last - index + 1, sizeof(*entry), map->alloc_flags); if (!entry) return -ENOMEM; @@ -204,7 +203,7 @@ static int regcache_maple_sync_block(struct regmap *map, unsigned long *entry, * overheads. */ if (max - min > 1 && regmap_can_raw_write(map)) { - buf = kmalloc(val_bytes * (max - min), map->alloc_flags); + buf = kmalloc_array(max - min, val_bytes, map->alloc_flags); if (!buf) { ret = -ENOMEM; goto out; @@ -290,6 +289,23 @@ out: return ret; } +static int regcache_maple_init(struct regmap *map) +{ + struct maple_tree *mt; + + mt = kmalloc(sizeof(*mt), map->alloc_flags); + if (!mt) + return -ENOMEM; + map->cache = mt; + + mt_init(mt); + + if (!mt_external_lock(mt) && map->lock_key) + lockdep_set_class_and_subclass(&mt->ma_lock, map->lock_key, 1); + + return 0; +} + static int regcache_maple_exit(struct regmap *map) { struct maple_tree *mt = map->cache; @@ -320,7 +336,7 @@ static int regcache_maple_insert_block(struct regmap *map, int first, unsigned long *entry; int i, ret; - entry = kcalloc(last - first + 1, sizeof(unsigned long), map->alloc_flags); + entry = kmalloc_array(last - first + 1, sizeof(*entry), map->alloc_flags); if (!entry) return -ENOMEM; @@ -341,26 +357,12 @@ static int regcache_maple_insert_block(struct regmap *map, int first, return ret; } -static int regcache_maple_init(struct regmap *map) +static int regcache_maple_populate(struct regmap *map) { - struct maple_tree *mt; int i; int ret; int range_start; - mt = kmalloc(sizeof(*mt), map->alloc_flags); - if (!mt) - return -ENOMEM; - map->cache = mt; - - mt_init(mt); - - if (!mt_external_lock(mt) && map->lock_key) - lockdep_set_class_and_subclass(&mt->ma_lock, map->lock_key, 1); - - if (!map->num_reg_defaults) - return 0; - range_start = 0; /* Scan for ranges of contiguous registers */ @@ -370,23 +372,14 @@ static int regcache_maple_init(struct regmap *map) ret = regcache_maple_insert_block(map, range_start, i - 1); if (ret != 0) - goto err; + return ret; range_start = i; } } /* Add the last block */ - ret = regcache_maple_insert_block(map, range_start, - map->num_reg_defaults - 1); - if (ret != 0) - goto err; - - return 0; - -err: - regcache_maple_exit(map); - return ret; + return regcache_maple_insert_block(map, range_start, map->num_reg_defaults - 1); } struct regcache_ops regcache_maple_ops = { @@ -394,6 +387,7 @@ struct regcache_ops regcache_maple_ops = { .name = "maple", .init = regcache_maple_init, .exit = regcache_maple_exit, + .populate = regcache_maple_populate, .read = regcache_maple_read, .write = regcache_maple_write, .drop = regcache_maple_drop, diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 188438186589..3344b82c3799 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -184,8 +184,6 @@ static void rbtree_debugfs_init(struct regmap *map) static int regcache_rbtree_init(struct regmap *map) { struct regcache_rbtree_ctx *rbtree_ctx; - int i; - int ret; map->cache = kmalloc(sizeof *rbtree_ctx, map->alloc_flags); if (!map->cache) @@ -195,19 +193,7 @@ static int regcache_rbtree_init(struct regmap *map) rbtree_ctx->root = RB_ROOT; rbtree_ctx->cached_rbnode = NULL; - for (i = 0; i < map->num_reg_defaults; i++) { - ret = regcache_rbtree_write(map, - map->reg_defaults[i].reg, - map->reg_defaults[i].def); - if (ret) - goto err; - } - return 0; - -err: - regcache_rbtree_exit(map); - return ret; } static int regcache_rbtree_exit(struct regmap *map) @@ -239,6 +225,22 @@ static int regcache_rbtree_exit(struct regmap *map) return 0; } +static int regcache_rbtree_populate(struct regmap *map) +{ + unsigned int i; + int ret; + + for (i = 0; i < map->num_reg_defaults; i++) { + ret = regcache_rbtree_write(map, + map->reg_defaults[i].reg, + map->reg_defaults[i].def); + if (ret) + return ret; + } + + return 0; +} + static int regcache_rbtree_read(struct regmap *map, unsigned int reg, unsigned int *value) { @@ -275,18 +277,16 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, pos = (reg - base_reg) / map->reg_stride; offset = (rbnode->base_reg - base_reg) / map->reg_stride; - blk = krealloc(rbnode->block, - blklen * map->cache_word_size, - map->alloc_flags); + blk = krealloc_array(rbnode->block, blklen, map->cache_word_size, map->alloc_flags); if (!blk) return -ENOMEM; rbnode->block = blk; if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { - present = krealloc(rbnode->cache_present, - BITS_TO_LONGS(blklen) * sizeof(*present), - map->alloc_flags); + present = krealloc_array(rbnode->cache_present, + BITS_TO_LONGS(blklen), sizeof(*present), + map->alloc_flags); if (!present) return -ENOMEM; @@ -548,6 +548,7 @@ struct regcache_ops regcache_rbtree_ops = { .name = "rbtree", .init = regcache_rbtree_init, .exit = regcache_rbtree_exit, + .populate = regcache_rbtree_populate, #ifdef CONFIG_DEBUG_FS .debugfs_init = rbtree_debugfs_init, #endif diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index d3659ba3cc11..319c342bf5a0 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -16,11 +16,32 @@ #include "internal.h" static const struct regcache_ops *cache_types[] = { + ®cache_flat_sparse_ops, ®cache_rbtree_ops, ®cache_maple_ops, ®cache_flat_ops, }; +static int regcache_defaults_cmp(const void *a, const void *b) +{ + const struct reg_default *x = a; + const struct reg_default *y = b; + + if (x->reg > y->reg) + return 1; + else if (x->reg < y->reg) + return -1; + else + return 0; +} + +void regcache_sort_defaults(struct reg_default *defaults, unsigned int ndefaults) +{ + sort(defaults, ndefaults, sizeof(*defaults), + regcache_defaults_cmp, NULL); +} +EXPORT_SYMBOL_GPL(regcache_sort_defaults); + static int regcache_hw_init(struct regmap *map) { int i, j; @@ -154,7 +175,7 @@ int regcache_init(struct regmap *map, const struct regmap_config *config) map->num_reg_defaults = config->num_reg_defaults; map->num_reg_defaults_raw = config->num_reg_defaults_raw; map->reg_defaults_raw = config->reg_defaults_raw; - map->cache_word_size = DIV_ROUND_UP(config->val_bits, 8); + map->cache_word_size = BITS_TO_BYTES(config->val_bits); map->cache_size_raw = map->cache_word_size * config->num_reg_defaults_raw; map->cache = NULL; @@ -201,8 +222,24 @@ int regcache_init(struct regmap *map, const struct regmap_config *config) if (ret) goto err_free; } + + if (map->num_reg_defaults && map->cache_ops->populate) { + dev_dbg(map->dev, "Populating %s cache\n", map->cache_ops->name); + map->lock(map->lock_arg); + ret = map->cache_ops->populate(map); + map->unlock(map->lock_arg); + if (ret) + goto err_exit; + } return 0; +err_exit: + if (map->cache_ops->exit) { + dev_dbg(map->dev, "Destroying %s cache\n", map->cache_ops->name); + map->lock(map->lock_arg); + ret = map->cache_ops->exit(map); + map->unlock(map->lock_arg); + } err_free: kfree(map->reg_defaults); if (map->cache_free) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index fb84cda92a75..c9b4c04b1cf6 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -470,10 +470,6 @@ static ssize_t regmap_cache_only_write_file(struct file *file, if (err) return count; - err = debugfs_file_get(file->f_path.dentry); - if (err) - return err; - map->lock(map->lock_arg); if (new_val && !map->cache_only) { @@ -486,7 +482,6 @@ static ssize_t regmap_cache_only_write_file(struct file *file, map->cache_only = new_val; map->unlock(map->lock_arg); - debugfs_file_put(file->f_path.dentry); if (require_sync) { err = regcache_sync(map); @@ -517,10 +512,6 @@ static ssize_t regmap_cache_bypass_write_file(struct file *file, if (err) return count; - err = debugfs_file_get(file->f_path.dentry); - if (err) - return err; - map->lock(map->lock_arg); if (new_val && !map->cache_bypass) { @@ -532,7 +523,6 @@ static ssize_t regmap_cache_bypass_write_file(struct file *file, map->cache_bypass = new_val; map->unlock(map->lock_arg); - debugfs_file_put(file->f_path.dentry); return count; } diff --git a/drivers/base/regmap/regmap-i3c.c b/drivers/base/regmap/regmap-i3c.c index b5300b7c477e..863b348704dc 100644 --- a/drivers/base/regmap/regmap-i3c.c +++ b/drivers/base/regmap/regmap-i3c.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. +#include <linux/array_size.h> #include <linux/regmap.h> #include <linux/i3c/device.h> #include <linux/i3c/master.h> @@ -10,7 +11,7 @@ static int regmap_i3c_write(void *context, const void *data, size_t count) { struct device *dev = context; struct i3c_device *i3c = dev_to_i3cdev(dev); - struct i3c_priv_xfer xfers[] = { + struct i3c_xfer xfers[] = { { .rnw = false, .len = count, @@ -18,7 +19,7 @@ static int regmap_i3c_write(void *context, const void *data, size_t count) }, }; - return i3c_device_do_priv_xfers(i3c, xfers, 1); + return i3c_device_do_xfers(i3c, xfers, ARRAY_SIZE(xfers), I3C_SDR); } static int regmap_i3c_read(void *context, @@ -27,7 +28,7 @@ static int regmap_i3c_read(void *context, { struct device *dev = context; struct i3c_device *i3c = dev_to_i3cdev(dev); - struct i3c_priv_xfer xfers[2]; + struct i3c_xfer xfers[2]; xfers[0].rnw = false; xfers[0].len = reg_size; @@ -37,7 +38,7 @@ static int regmap_i3c_read(void *context, xfers[1].len = val_size; xfers[1].data.in = val; - return i3c_device_do_priv_xfers(i3c, xfers, 2); + return i3c_device_do_xfers(i3c, xfers, ARRAY_SIZE(xfers), I3C_SDR); } static const struct regmap_bus regmap_i3c = { diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 0bcd81389a29..6112d942499b 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -6,11 +6,13 @@ // // Author: Mark Brown <broonie@opensource.wolfsonmicro.com> +#include <linux/array_size.h> #include <linux/device.h> #include <linux/export.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/overflow.h> #include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/slab.h> @@ -19,6 +21,7 @@ struct regmap_irq_chip_data { struct mutex lock; + struct lock_class_key lock_key; struct irq_chip irq_chip; struct regmap *map; @@ -33,6 +36,7 @@ struct regmap_irq_chip_data { void *status_reg_buf; unsigned int *main_status_buf; unsigned int *status_buf; + unsigned int *prev_status_buf; unsigned int *mask_buf; unsigned int *mask_buf_def; unsigned int *wake_buf; @@ -193,10 +197,10 @@ static void regmap_irq_sync_unlock(struct irq_data *data) /* If we've changed our wakeup count propagate it to the parent */ if (d->wake_count < 0) for (i = d->wake_count; i < 0; i++) - irq_set_irq_wake(d->irq, 0); + disable_irq_wake(d->irq); else if (d->wake_count > 0) for (i = 0; i < d->wake_count; i++) - irq_set_irq_wake(d->irq, 1); + enable_irq_wake(d->irq); d->wake_count = 0; @@ -332,27 +336,13 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data, return ret; } -static irqreturn_t regmap_irq_thread(int irq, void *d) +static int read_irq_data(struct regmap_irq_chip_data *data) { - struct regmap_irq_chip_data *data = d; const struct regmap_irq_chip *chip = data->chip; struct regmap *map = data->map; int ret, i; - bool handled = false; u32 reg; - if (chip->handle_pre_irq) - chip->handle_pre_irq(chip->irq_drv_data); - - if (chip->runtime_pm) { - ret = pm_runtime_get_sync(map->dev); - if (ret < 0) { - dev_err(map->dev, "IRQ thread failed to resume: %d\n", - ret); - goto exit; - } - } - /* * Read only registers with active IRQs if the chip has 'main status * register'. Else read in the statuses, using a single bulk read if @@ -379,10 +369,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) reg = data->get_irq_reg(data, chip->main_status, i); ret = regmap_read(map, reg, &data->main_status_buf[i]); if (ret) { - dev_err(map->dev, - "Failed to read IRQ status %d\n", - ret); - goto exit; + dev_err(map->dev, "Failed to read IRQ status %d\n", ret); + return ret; } } @@ -398,10 +386,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) ret = read_sub_irq_data(data, b); if (ret != 0) { - dev_err(map->dev, - "Failed to read IRQ status %d\n", - ret); - goto exit; + dev_err(map->dev, "Failed to read IRQ status %d\n", ret); + return ret; } } @@ -418,9 +404,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) data->status_reg_buf, chip->num_regs); if (ret != 0) { - dev_err(map->dev, "Failed to read IRQ status: %d\n", - ret); - goto exit; + dev_err(map->dev, "Failed to read IRQ status: %d\n", ret); + return ret; } for (i = 0; i < data->chip->num_regs; i++) { @@ -436,7 +421,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) break; default: BUG(); - goto exit; + return -EIO; } } @@ -447,10 +432,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) ret = regmap_read(map, reg, &data->status_buf[i]); if (ret != 0) { - dev_err(map->dev, - "Failed to read IRQ status: %d\n", - ret); - goto exit; + dev_err(map->dev, "Failed to read IRQ status: %d\n", ret); + return ret; } } } @@ -459,6 +442,42 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) for (i = 0; i < data->chip->num_regs; i++) data->status_buf[i] = ~data->status_buf[i]; + return 0; +} + +static irqreturn_t regmap_irq_thread(int irq, void *d) +{ + struct regmap_irq_chip_data *data = d; + const struct regmap_irq_chip *chip = data->chip; + struct regmap *map = data->map; + int ret, i; + bool handled = false; + u32 reg; + + if (chip->handle_pre_irq) + chip->handle_pre_irq(chip->irq_drv_data); + + if (chip->runtime_pm) { + ret = pm_runtime_get_sync(map->dev); + if (ret < 0) { + dev_err(map->dev, "IRQ thread failed to resume: %d\n", ret); + goto exit; + } + } + + ret = read_irq_data(data); + if (ret < 0) + goto exit; + + if (chip->status_is_level) { + for (i = 0; i < data->chip->num_regs; i++) { + unsigned int val = data->status_buf[i]; + + data->status_buf[i] ^= data->prev_status_buf[i]; + data->prev_status_buf[i] = val; + } + } + /* * Ignore masked IRQs and ack if we need to; we ack early so * there is no race between handling and acknowledging the @@ -705,6 +724,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (!d->status_buf) goto err_alloc; + if (chip->status_is_level) { + d->prev_status_buf = kcalloc(chip->num_regs, sizeof(*d->prev_status_buf), + GFP_KERNEL); + if (!d->prev_status_buf) + goto err_alloc; + } + d->mask_buf = kcalloc(chip->num_regs, sizeof(*d->mask_buf), GFP_KERNEL); if (!d->mask_buf) @@ -776,7 +802,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, goto err_alloc; } - mutex_init(&d->lock); + /* + * If one regmap-irq is the parent of another then we'll try + * to lock the child with the parent locked, use an explicit + * lock_key so lockdep can figure out what's going on. + */ + lockdep_register_key(&d->lock_key); + mutex_init_with_key(&d->lock, &d->lock_key); for (i = 0; i < chip->num_irqs; i++) d->mask_buf_def[chip->irqs[i].reg_offset / map->reg_stride] @@ -791,7 +823,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, d->mask_buf[i], chip->irq_drv_data); if (ret) - goto err_alloc; + goto err_mutex; } if (chip->mask_base && !chip->handle_mask_sync) { @@ -802,7 +834,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } @@ -813,7 +845,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } @@ -823,14 +855,14 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, /* Ack masked but set interrupts */ if (d->chip->no_status) { /* no status register so default to all active */ - d->status_buf[i] = GENMASK(31, 0); + d->status_buf[i] = UINT_MAX; } else { reg = d->get_irq_reg(d, d->chip->status_base, i); ret = regmap_read(map, reg, &d->status_buf[i]); if (ret != 0) { dev_err(map->dev, "Failed to read IRQ status: %d\n", ret); - goto err_alloc; + goto err_mutex; } } @@ -854,7 +886,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to ack 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } } @@ -876,14 +908,24 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } } + /* Store current levels */ + if (chip->status_is_level) { + ret = read_irq_data(d); + if (ret < 0) + goto err_mutex; + + memcpy(d->prev_status_buf, d->status_buf, + array_size(d->chip->num_regs, sizeof(d->prev_status_buf[0]))); + } + ret = regmap_irq_create_domain(fwnode, irq_base, chip, d); if (ret) - goto err_alloc; + goto err_mutex; ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags | IRQF_ONESHOT, @@ -900,13 +942,18 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, err_domain: /* Should really dispose of the domain but... */ +err_mutex: + mutex_destroy(&d->lock); + lockdep_unregister_key(&d->lock_key); err_alloc: kfree(d->type_buf); kfree(d->type_buf_def); kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); + kfree(d->main_status_buf); kfree(d->status_buf); + kfree(d->prev_status_buf); kfree(d->status_reg_buf); if (d->config_buf) { for (i = 0; i < chip->num_config_bases; i++) @@ -981,13 +1028,17 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); + kfree(d->main_status_buf); kfree(d->status_reg_buf); kfree(d->status_buf); + kfree(d->prev_status_buf); if (d->config_buf) { for (i = 0; i < d->chip->num_config_bases; i++) kfree(d->config_buf[i]); kfree(d->config_buf); } + mutex_destroy(&d->lock); + lockdep_unregister_key(&d->lock_key); kfree(d); } EXPORT_SYMBOL_GPL(regmap_del_irq_chip); diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c index 64ea340950b6..f6fc5ed016da 100644 --- a/drivers/base/regmap/regmap-kunit.c +++ b/drivers/base/regmap/regmap-kunit.c @@ -54,6 +54,8 @@ static const char *regcache_type_name(enum regcache_type type) return "none"; case REGCACHE_FLAT: return "flat"; + case REGCACHE_FLAT_S: + return "flat-sparse"; case REGCACHE_RBTREE: return "rbtree"; case REGCACHE_MAPLE: @@ -93,6 +95,8 @@ static const struct regmap_test_param regcache_types_list[] = { { .cache = REGCACHE_NONE, .fast_io = true }, { .cache = REGCACHE_FLAT }, { .cache = REGCACHE_FLAT, .fast_io = true }, + { .cache = REGCACHE_FLAT_S }, + { .cache = REGCACHE_FLAT_S, .fast_io = true }, { .cache = REGCACHE_RBTREE }, { .cache = REGCACHE_RBTREE, .fast_io = true }, { .cache = REGCACHE_MAPLE }, @@ -104,6 +108,8 @@ KUNIT_ARRAY_PARAM(regcache_types, regcache_types_list, param_to_desc); static const struct regmap_test_param real_cache_types_only_list[] = { { .cache = REGCACHE_FLAT }, { .cache = REGCACHE_FLAT, .fast_io = true }, + { .cache = REGCACHE_FLAT_S }, + { .cache = REGCACHE_FLAT_S, .fast_io = true }, { .cache = REGCACHE_RBTREE }, { .cache = REGCACHE_RBTREE, .fast_io = true }, { .cache = REGCACHE_MAPLE }, @@ -119,6 +125,12 @@ static const struct regmap_test_param real_cache_types_list[] = { { .cache = REGCACHE_FLAT, .from_reg = 0x2002 }, { .cache = REGCACHE_FLAT, .from_reg = 0x2003 }, { .cache = REGCACHE_FLAT, .from_reg = 0x2004 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0, .fast_io = true }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2001 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2002 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2003 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2004 }, { .cache = REGCACHE_RBTREE, .from_reg = 0 }, { .cache = REGCACHE_RBTREE, .from_reg = 0, .fast_io = true }, { .cache = REGCACHE_RBTREE, .from_reg = 0x2001 }, @@ -136,6 +148,12 @@ static const struct regmap_test_param real_cache_types_list[] = { KUNIT_ARRAY_PARAM(real_cache_types, real_cache_types_list, param_to_desc); static const struct regmap_test_param sparse_cache_types_list[] = { + { .cache = REGCACHE_FLAT_S, .from_reg = 0 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0, .fast_io = true }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2001 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2002 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2003 }, + { .cache = REGCACHE_FLAT_S, .from_reg = 0x2004 }, { .cache = REGCACHE_RBTREE, .from_reg = 0 }, { .cache = REGCACHE_RBTREE, .from_reg = 0, .fast_io = true }, { .cache = REGCACHE_RBTREE, .from_reg = 0x2001 }, @@ -736,7 +754,7 @@ static void stride(struct kunit *test) } } -static struct regmap_range_cfg test_range = { +static const struct regmap_range_cfg test_range = { .selector_reg = 1, .selector_mask = 0xff, @@ -1597,6 +1615,8 @@ static const struct regmap_test_param raw_types_list[] = { { .cache = REGCACHE_NONE, .val_endian = REGMAP_ENDIAN_BIG }, { .cache = REGCACHE_FLAT, .val_endian = REGMAP_ENDIAN_LITTLE }, { .cache = REGCACHE_FLAT, .val_endian = REGMAP_ENDIAN_BIG }, + { .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_LITTLE }, + { .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_BIG }, { .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_LITTLE }, { .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_BIG }, { .cache = REGCACHE_MAPLE, .val_endian = REGMAP_ENDIAN_LITTLE }, @@ -1608,6 +1628,8 @@ KUNIT_ARRAY_PARAM(raw_test_types, raw_types_list, param_to_desc); static const struct regmap_test_param raw_cache_types_list[] = { { .cache = REGCACHE_FLAT, .val_endian = REGMAP_ENDIAN_LITTLE }, { .cache = REGCACHE_FLAT, .val_endian = REGMAP_ENDIAN_BIG }, + { .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_LITTLE }, + { .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_BIG }, { .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_LITTLE }, { .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_BIG }, { .cache = REGCACHE_MAPLE, .val_endian = REGMAP_ENDIAN_LITTLE }, diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c index 99d7fd85ca7d..29e5f3175301 100644 --- a/drivers/base/regmap/regmap-mmio.c +++ b/drivers/base/regmap/regmap-mmio.c @@ -609,4 +609,5 @@ void regmap_mmio_detach_clk(struct regmap *map) } EXPORT_SYMBOL_GPL(regmap_mmio_detach_clk); +MODULE_DESCRIPTION("regmap MMIO Module"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/base/regmap/regmap-sdw-mbq.c b/drivers/base/regmap/regmap-sdw-mbq.c index c99eada83780..6a61629f5f89 100644 --- a/drivers/base/regmap/regmap-sdw-mbq.c +++ b/drivers/base/regmap/regmap-sdw-mbq.c @@ -1,47 +1,189 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright(c) 2020 Intel Corporation. +#include <linux/bits.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/errno.h> +#include <linux/iopoll.h> #include <linux/module.h> #include <linux/regmap.h> #include <linux/soundwire/sdw.h> #include <linux/soundwire/sdw_registers.h> +#include <sound/sdca_function.h> #include "internal.h" +struct regmap_mbq_context { + struct device *dev; + struct sdw_slave *sdw; + + bool (*readable_reg)(struct device *dev, unsigned int reg); + + struct regmap_sdw_mbq_cfg cfg; + + int val_size; +}; + +static int regmap_sdw_mbq_size(struct regmap_mbq_context *ctx, unsigned int reg) +{ + int size = ctx->val_size; + + if (ctx->cfg.mbq_size) { + size = ctx->cfg.mbq_size(ctx->dev, reg); + if (!size || size > ctx->val_size) + return -EINVAL; + } + + return size; +} + +static bool regmap_sdw_mbq_deferrable(struct regmap_mbq_context *ctx, unsigned int reg) +{ + if (ctx->cfg.deferrable) + return ctx->cfg.deferrable(ctx->dev, reg); + + return false; +} + +static int regmap_sdw_mbq_poll_busy(struct sdw_slave *slave, unsigned int reg, + struct regmap_mbq_context *ctx) +{ + struct device *dev = ctx->dev; + int val, ret = 0; + + dev_dbg(dev, "Deferring transaction for 0x%x\n", reg); + + reg = SDW_SDCA_CTL(SDW_SDCA_CTL_FUNC(reg), 0, + SDCA_CTL_ENTITY_0_FUNCTION_STATUS, 0); + + if (ctx->readable_reg(dev, reg)) { + ret = read_poll_timeout(sdw_read_no_pm, val, + val < 0 || !(val & SDCA_CTL_ENTITY_0_FUNCTION_BUSY), + ctx->cfg.timeout_us, ctx->cfg.retry_us, + false, slave, reg); + if (val < 0) + return val; + if (ret) + dev_err(dev, "Function busy timed out 0x%x: %d\n", reg, val); + } else { + fsleep(ctx->cfg.timeout_us); + } + + return ret; +} + +static int regmap_sdw_mbq_write_impl(struct sdw_slave *slave, + unsigned int reg, unsigned int val, + int mbq_size, bool deferrable) +{ + int shift = mbq_size * BITS_PER_BYTE; + int ret; + + while (--mbq_size > 0) { + shift -= BITS_PER_BYTE; + + ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg), + (val >> shift) & 0xff); + if (ret < 0) + return ret; + } + + ret = sdw_write_no_pm(slave, reg, val & 0xff); + if (deferrable && ret == -ENODATA) + return -EAGAIN; + + return ret; +} + static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int val) { - struct device *dev = context; - struct sdw_slave *slave = dev_to_sdw_dev(dev); + struct regmap_mbq_context *ctx = context; + struct sdw_slave *slave = ctx->sdw; + bool deferrable = regmap_sdw_mbq_deferrable(ctx, reg); + int mbq_size = regmap_sdw_mbq_size(ctx, reg); int ret; - ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff); - if (ret < 0) - return ret; + if (mbq_size < 0) + return mbq_size; + + /* + * Technically the spec does allow a device to set itself to busy for + * internal reasons, but since it doesn't provide any information on + * how to handle timeouts in that case, for now the code will only + * process a single wait/timeout on function busy and a single retry + * of the transaction. + */ + ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size, deferrable); + if (ret == -EAGAIN) { + ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx); + if (ret) + return ret; + + ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size, false); + } - return sdw_write_no_pm(slave, reg, val & 0xff); + return ret; } -static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val) +static int regmap_sdw_mbq_read_impl(struct sdw_slave *slave, + unsigned int reg, unsigned int *val, + int mbq_size, bool deferrable) { - struct device *dev = context; - struct sdw_slave *slave = dev_to_sdw_dev(dev); - int read0; - int read1; + int shift = BITS_PER_BYTE; + int read; + + read = sdw_read_no_pm(slave, reg); + if (read < 0) { + if (deferrable && read == -ENODATA) + return -EAGAIN; + + return read; + } - read0 = sdw_read_no_pm(slave, reg); - if (read0 < 0) - return read0; + *val = read; - read1 = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg)); - if (read1 < 0) - return read1; + while (--mbq_size > 0) { + read = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg)); + if (read < 0) + return read; - *val = (read1 << 8) | read0; + *val |= read << shift; + shift += BITS_PER_BYTE; + } return 0; } +static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val) +{ + struct regmap_mbq_context *ctx = context; + struct sdw_slave *slave = ctx->sdw; + bool deferrable = regmap_sdw_mbq_deferrable(ctx, reg); + int mbq_size = regmap_sdw_mbq_size(ctx, reg); + int ret; + + if (mbq_size < 0) + return mbq_size; + + /* + * Technically the spec does allow a device to set itself to busy for + * internal reasons, but since it doesn't provide any information on + * how to handle timeouts in that case, for now the code will only + * process a single wait/timeout on function busy and a single retry + * of the transaction. + */ + ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size, deferrable); + if (ret == -EAGAIN) { + ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx); + if (ret) + return ret; + + ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size, false); + } + + return ret; +} + static const struct regmap_bus regmap_sdw_mbq = { .reg_read = regmap_sdw_mbq_read, .reg_write = regmap_sdw_mbq_write, @@ -51,8 +193,7 @@ static const struct regmap_bus regmap_sdw_mbq = { static int regmap_sdw_mbq_config_check(const struct regmap_config *config) { - /* MBQ-based controls are only 16-bits for now */ - if (config->val_bits != 16) + if (config->val_bits > (sizeof(unsigned int) * BITS_PER_BYTE)) return -ENOTSUPP; /* Registers are 32 bits wide */ @@ -65,35 +206,71 @@ static int regmap_sdw_mbq_config_check(const struct regmap_config *config) return 0; } -struct regmap *__regmap_init_sdw_mbq(struct sdw_slave *sdw, +static struct regmap_mbq_context * +regmap_sdw_mbq_gen_context(struct device *dev, + struct sdw_slave *sdw, + const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config) +{ + struct regmap_mbq_context *ctx; + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->dev = dev; + ctx->sdw = sdw; + + if (mbq_config) + ctx->cfg = *mbq_config; + + ctx->val_size = config->val_bits / BITS_PER_BYTE; + ctx->readable_reg = config->readable_reg; + + return ctx; +} + +struct regmap *__regmap_init_sdw_mbq(struct device *dev, struct sdw_slave *sdw, const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name) { + struct regmap_mbq_context *ctx; int ret; ret = regmap_sdw_mbq_config_check(config); if (ret) return ERR_PTR(ret); - return __regmap_init(&sdw->dev, ®map_sdw_mbq, - &sdw->dev, config, lock_key, lock_name); + ctx = regmap_sdw_mbq_gen_context(dev, sdw, config, mbq_config); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + return __regmap_init(dev, ®map_sdw_mbq, ctx, + config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__regmap_init_sdw_mbq); -struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw, +struct regmap *__devm_regmap_init_sdw_mbq(struct device *dev, struct sdw_slave *sdw, const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name) { + struct regmap_mbq_context *ctx; int ret; ret = regmap_sdw_mbq_config_check(config); if (ret) return ERR_PTR(ret); - return __devm_regmap_init(&sdw->dev, ®map_sdw_mbq, - &sdw->dev, config, lock_key, lock_name); + ctx = regmap_sdw_mbq_gen_context(dev, sdw, config, mbq_config); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + return __devm_regmap_init(dev, ®map_sdw_mbq, ctx, + config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw_mbq); diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c index 54eb7d227cf4..e523fae73004 100644 --- a/drivers/base/regmap/regmap-slimbus.c +++ b/drivers/base/regmap/regmap-slimbus.c @@ -48,8 +48,7 @@ struct regmap *__regmap_init_slimbus(struct slim_device *slimbus, if (IS_ERR(bus)) return ERR_CAST(bus); - return __regmap_init(&slimbus->dev, bus, &slimbus->dev, config, - lock_key, lock_name); + return __regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__regmap_init_slimbus); @@ -63,8 +62,7 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, if (IS_ERR(bus)) return ERR_CAST(bus); - return __devm_regmap_init(&slimbus->dev, bus, &slimbus, config, - lock_key, lock_name); + return __devm_regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__devm_regmap_init_slimbus); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 53131a7ede0a..ce9be3989a21 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -598,6 +598,17 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, } EXPORT_SYMBOL_GPL(regmap_attach_dev); +static int dev_get_regmap_match(struct device *dev, void *res, void *data); + +static int regmap_detach_dev(struct device *dev, struct regmap *map) +{ + if (!dev) + return 0; + + return devres_release(dev, dev_get_regmap_release, + dev_get_regmap_match, (void *)map->name); +} + static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus, const struct regmap_config *config) { @@ -758,14 +769,13 @@ struct regmap *__regmap_init(struct device *dev, map->alloc_flags = GFP_KERNEL; map->reg_base = config->reg_base; + map->reg_shift = config->pad_bits % 8; - map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8); map->format.pad_bytes = config->pad_bits / 8; map->format.reg_shift = config->reg_shift; - map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8); - map->format.buf_size = DIV_ROUND_UP(config->reg_bits + - config->val_bits + config->pad_bits, 8); - map->reg_shift = config->pad_bits % 8; + map->format.reg_bytes = BITS_TO_BYTES(config->reg_bits); + map->format.val_bytes = BITS_TO_BYTES(config->val_bits); + map->format.buf_size = BITS_TO_BYTES(config->reg_bits + config->val_bits + config->pad_bits); if (config->reg_stride) map->reg_stride = config->reg_stride; else @@ -817,7 +827,7 @@ struct regmap *__regmap_init(struct device *dev, map->read_flag_mask = bus->read_flag_mask; } - if (config && config->read && config->write) { + if (config->read && config->write) { map->reg_read = _regmap_bus_read; if (config->reg_update_bits) map->reg_update_bits = config->reg_update_bits; @@ -1052,13 +1062,13 @@ skip_format_initialization: /* Sanity check */ if (range_cfg->range_max < range_cfg->range_min) { - dev_err(map->dev, "Invalid range %d: %d < %d\n", i, + dev_err(map->dev, "Invalid range %d: %u < %u\n", i, range_cfg->range_max, range_cfg->range_min); goto err_range; } if (range_cfg->range_max > map->max_register) { - dev_err(map->dev, "Invalid range %d: %d > %d\n", i, + dev_err(map->dev, "Invalid range %d: %u > %u\n", i, range_cfg->range_max, map->max_register); goto err_range; } @@ -1163,6 +1173,8 @@ err_name: err_map: kfree(map); err: + if (bus && bus->free_on_exit) + kfree(bus); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(__regmap_init); @@ -1445,6 +1457,7 @@ void regmap_exit(struct regmap *map) { struct regmap_async *async; + regmap_detach_dev(map->dev, map); regcache_exit(map); regmap_debugfs_exit(map); @@ -2245,12 +2258,14 @@ EXPORT_SYMBOL_GPL(regmap_field_update_bits_base); * @field: Register field to operate on * @bits: Bits to test * - * Returns -1 if the underlying regmap_field_read() fails, 0 if at least one of the - * tested bits is not set and 1 if all tested bits are set. + * Returns negative errno if the underlying regmap_field_read() fails, + * 0 if at least one of the tested bits is not set and 1 if all tested + * bits are set. */ int regmap_field_test_bits(struct regmap_field *field, unsigned int bits) { - unsigned int val, ret; + unsigned int val; + int ret; ret = regmap_field_read(field, &val); if (ret) @@ -3104,7 +3119,7 @@ int regmap_fields_read(struct regmap_field *field, unsigned int id, EXPORT_SYMBOL_GPL(regmap_fields_read); static int _regmap_bulk_read(struct regmap *map, unsigned int reg, - unsigned int *regs, void *val, size_t val_count) + const unsigned int *regs, void *val, size_t val_count) { u32 *u32 = val; u16 *u16 = val; @@ -3198,7 +3213,7 @@ EXPORT_SYMBOL_GPL(regmap_bulk_read); * A value of zero will be returned on success, a negative errno will * be returned in error cases. */ -int regmap_multi_reg_read(struct regmap *map, unsigned int *regs, void *val, +int regmap_multi_reg_read(struct regmap *map, const unsigned int *regs, void *val, size_t val_count) { if (val_count == 0) @@ -3296,7 +3311,8 @@ EXPORT_SYMBOL_GPL(regmap_update_bits_base); */ int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits) { - unsigned int val, ret; + unsigned int val; + int ret; ret = regmap_read(map, reg, &val); if (ret) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index eb6eb25b343b..16a8301c25d6 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -529,20 +529,35 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode, if (prop->is_inline) return -EINVAL; - if (index * sizeof(*ref) >= prop->length) + if ((index + 1) * sizeof(*ref) > prop->length) return -ENOENT; ref_array = prop->pointer; ref = &ref_array[index]; - refnode = software_node_fwnode(ref->node); + /* + * A software node can reference other software nodes or firmware + * nodes (which are the abstraction layer sitting on top of them). + * This is done to ensure we can create references to static software + * nodes before they're registered with the firmware node framework. + * At the time the reference is being resolved, we expect the swnodes + * in question to already have been registered and to be backed by + * a firmware node. This is why we use the fwnode API below to read the + * relevant properties and bump the reference count. + */ + + if (ref->swnode) + refnode = software_node_fwnode(ref->swnode); + else if (ref->fwnode) + refnode = ref->fwnode; + else + return -EINVAL; + if (!refnode) return -ENOENT; if (nargs_prop) { - error = property_entry_read_int_array(ref->node->properties, - nargs_prop, sizeof(u32), - &nargs_prop_val, 1); + error = fwnode_property_read_u32(refnode, nargs_prop, &nargs_prop_val); if (error) return error; @@ -555,7 +570,7 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode, if (!args) return 0; - args->fwnode = software_node_get(refnode); + args->fwnode = fwnode_handle_get(refnode); args->nargs = nargs; for (i = 0; i < nargs; i++) @@ -635,7 +650,10 @@ software_node_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) ref = prop->pointer; - return software_node_get(software_node_fwnode(ref[0].node)); + if (!ref->swnode) + return NULL; + + return software_node_get(software_node_fwnode(ref->swnode)); } static struct fwnode_handle * @@ -677,6 +695,7 @@ static const struct fwnode_operations software_node_ops = { .get = software_node_get, .put = software_node_put, .property_present = software_node_property_present, + .property_read_bool = software_node_property_present, .property_read_int_array = software_node_read_int_array, .property_read_string_array = software_node_read_string_array, .get_name = software_node_get_name, @@ -843,7 +862,7 @@ swnode_register(const struct software_node *node, struct swnode *parent, * of this function or by ordering the array such that parent comes before * child. */ -int software_node_register_node_group(const struct software_node **node_group) +int software_node_register_node_group(const struct software_node * const *node_group) { unsigned int i; int ret; @@ -876,8 +895,7 @@ EXPORT_SYMBOL_GPL(software_node_register_node_group); * remove the nodes individually, in the correct order (child before * parent). */ -void software_node_unregister_node_group( - const struct software_node **node_group) +void software_node_unregister_node_group(const struct software_node * const *node_group) { unsigned int i = 0; @@ -1079,6 +1097,7 @@ void software_node_notify(struct device *dev) if (!swnode) return; + kobject_get(&swnode->kobj); ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); if (ret) return; @@ -1088,8 +1107,6 @@ void software_node_notify(struct device *dev) sysfs_remove_link(&dev->kobj, "software_node"); return; } - - kobject_get(&swnode->kobj); } void software_node_notify_remove(struct device *dev) diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index 13db1f78d2ce..483adb796654 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -11,32 +11,32 @@ #include <linux/suspend.h> #include <trace/events/power.h> -static LIST_HEAD(syscore_ops_list); -static DEFINE_MUTEX(syscore_ops_lock); +static LIST_HEAD(syscore_list); +static DEFINE_MUTEX(syscore_lock); /** - * register_syscore_ops - Register a set of system core operations. - * @ops: System core operations to register. + * register_syscore - Register a set of system core operations. + * @syscore: System core operations to register. */ -void register_syscore_ops(struct syscore_ops *ops) +void register_syscore(struct syscore *syscore) { - mutex_lock(&syscore_ops_lock); - list_add_tail(&ops->node, &syscore_ops_list); - mutex_unlock(&syscore_ops_lock); + mutex_lock(&syscore_lock); + list_add_tail(&syscore->node, &syscore_list); + mutex_unlock(&syscore_lock); } -EXPORT_SYMBOL_GPL(register_syscore_ops); +EXPORT_SYMBOL_GPL(register_syscore); /** - * unregister_syscore_ops - Unregister a set of system core operations. - * @ops: System core operations to unregister. + * unregister_syscore - Unregister a set of system core operations. + * @syscore: System core operations to unregister. */ -void unregister_syscore_ops(struct syscore_ops *ops) +void unregister_syscore(struct syscore *syscore) { - mutex_lock(&syscore_ops_lock); - list_del(&ops->node); - mutex_unlock(&syscore_ops_lock); + mutex_lock(&syscore_lock); + list_del(&syscore->node); + mutex_unlock(&syscore_lock); } -EXPORT_SYMBOL_GPL(unregister_syscore_ops); +EXPORT_SYMBOL_GPL(unregister_syscore); #ifdef CONFIG_PM_SLEEP /** @@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(unregister_syscore_ops); */ int syscore_suspend(void) { - struct syscore_ops *ops; + struct syscore *syscore; int ret = 0; trace_suspend_resume(TPS("syscore_suspend"), 0, true); @@ -59,25 +59,27 @@ int syscore_suspend(void) WARN_ONCE(!irqs_disabled(), "Interrupts enabled before system core suspend.\n"); - list_for_each_entry_reverse(ops, &syscore_ops_list, node) - if (ops->suspend) { - pm_pr_dbg("Calling %pS\n", ops->suspend); - ret = ops->suspend(); + list_for_each_entry_reverse(syscore, &syscore_list, node) + if (syscore->ops->suspend) { + pm_pr_dbg("Calling %pS\n", syscore->ops->suspend); + ret = syscore->ops->suspend(syscore->data); if (ret) goto err_out; WARN_ONCE(!irqs_disabled(), - "Interrupts enabled after %pS\n", ops->suspend); + "Interrupts enabled after %pS\n", + syscore->ops->suspend); } trace_suspend_resume(TPS("syscore_suspend"), 0, false); return 0; err_out: - pr_err("PM: System core suspend callback %pS failed.\n", ops->suspend); + pr_err("PM: System core suspend callback %pS failed.\n", + syscore->ops->suspend); - list_for_each_entry_continue(ops, &syscore_ops_list, node) - if (ops->resume) - ops->resume(); + list_for_each_entry_continue(syscore, &syscore_list, node) + if (syscore->ops->resume) + syscore->ops->resume(syscore->data); return ret; } @@ -90,18 +92,19 @@ EXPORT_SYMBOL_GPL(syscore_suspend); */ void syscore_resume(void) { - struct syscore_ops *ops; + struct syscore *syscore; trace_suspend_resume(TPS("syscore_resume"), 0, true); WARN_ONCE(!irqs_disabled(), "Interrupts enabled before system core resume.\n"); - list_for_each_entry(ops, &syscore_ops_list, node) - if (ops->resume) { - pm_pr_dbg("Calling %pS\n", ops->resume); - ops->resume(); + list_for_each_entry(syscore, &syscore_list, node) + if (syscore->ops->resume) { + pm_pr_dbg("Calling %pS\n", syscore->ops->resume); + syscore->ops->resume(syscore->data); WARN_ONCE(!irqs_disabled(), - "Interrupts enabled after %pS\n", ops->resume); + "Interrupts enabled after %pS\n", + syscore->ops->resume); } trace_suspend_resume(TPS("syscore_resume"), 0, false); } @@ -113,16 +116,17 @@ EXPORT_SYMBOL_GPL(syscore_resume); */ void syscore_shutdown(void) { - struct syscore_ops *ops; + struct syscore *syscore; - mutex_lock(&syscore_ops_lock); + mutex_lock(&syscore_lock); - list_for_each_entry_reverse(ops, &syscore_ops_list, node) - if (ops->shutdown) { + list_for_each_entry_reverse(syscore, &syscore_list, node) + if (syscore->ops->shutdown) { if (initcall_debug) - pr_info("PM: Calling %pS\n", ops->shutdown); - ops->shutdown(); + pr_info("PM: Calling %pS\n", + syscore->ops->shutdown); + syscore->ops->shutdown(syscore->data); } - mutex_unlock(&syscore_ops_lock); + mutex_unlock(&syscore_lock); } diff --git a/drivers/base/test/Kconfig b/drivers/base/test/Kconfig index 5c7fac80611c..2756870615cc 100644 --- a/drivers/base/test/Kconfig +++ b/drivers/base/test/Kconfig @@ -12,6 +12,7 @@ config TEST_ASYNC_DRIVER_PROBE config DM_KUNIT_TEST tristate "KUnit Tests for the device model" if !KUNIT_ALL_TESTS depends on KUNIT + default KUNIT_ALL_TESTS config DRIVER_PE_KUNIT_TEST tristate "KUnit Tests for property entry API" if !KUNIT_ALL_TESTS diff --git a/drivers/base/test/platform-device-test.c b/drivers/base/test/platform-device-test.c index ea05b8785743..6355a2231b74 100644 --- a/drivers/base/test/platform-device-test.c +++ b/drivers/base/test/platform-device-test.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include <kunit/platform_device.h> #include <kunit/resource.h> #include <linux/device.h> +#include <linux/device/bus.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #define DEVICE_NAME "test" @@ -217,7 +220,43 @@ static struct kunit_suite platform_device_devm_test_suite = { .test_cases = platform_device_devm_tests, }; -kunit_test_suite(platform_device_devm_test_suite); +static void platform_device_find_by_null_test(struct kunit *test) +{ + struct platform_device *pdev; + int ret; + + pdev = kunit_platform_device_alloc(test, DEVICE_NAME, PLATFORM_DEVID_NONE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev); + + ret = kunit_platform_device_add(test, pdev); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_EXPECT_PTR_EQ(test, of_find_device_by_node(NULL), NULL); + + KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_of_node(&platform_bus_type, NULL), NULL); + KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_fwnode(&platform_bus_type, NULL), NULL); + KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_acpi_dev(&platform_bus_type, NULL), NULL); + + KUNIT_EXPECT_FALSE(test, device_match_of_node(&pdev->dev, NULL)); + KUNIT_EXPECT_FALSE(test, device_match_fwnode(&pdev->dev, NULL)); + KUNIT_EXPECT_FALSE(test, device_match_acpi_dev(&pdev->dev, NULL)); + KUNIT_EXPECT_FALSE(test, device_match_acpi_handle(&pdev->dev, NULL)); +} + +static struct kunit_case platform_device_match_tests[] = { + KUNIT_CASE(platform_device_find_by_null_test), + {} +}; + +static struct kunit_suite platform_device_match_test_suite = { + .name = "platform-device-match", + .test_cases = platform_device_match_tests, +}; + +kunit_test_suites( + &platform_device_devm_test_suite, + &platform_device_match_test_suite, +); MODULE_DESCRIPTION("Test module for platform devices"); MODULE_AUTHOR("Maxime Ripard <mripard@kernel.org>"); diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 89f98be5c5b9..c890e2a5b428 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -23,23 +23,39 @@ static ssize_t name##_show(struct device *dev, \ #define define_siblings_read_func(name, mask) \ static ssize_t name##_read(struct file *file, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, \ + const struct bin_attribute *attr, char *buf, \ loff_t off, size_t count) \ { \ struct device *dev = kobj_to_dev(kobj); \ + cpumask_var_t mask; \ + ssize_t n; \ \ - return cpumap_print_bitmask_to_buf(buf, topology_##mask(dev->id), \ - off, count); \ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) \ + return -ENOMEM; \ + \ + cpumask_copy(mask, topology_##mask(dev->id)); \ + n = cpumap_print_bitmask_to_buf(buf, mask, off, count); \ + free_cpumask_var(mask); \ + \ + return n; \ } \ \ static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, \ + const struct bin_attribute *attr, char *buf, \ loff_t off, size_t count) \ { \ struct device *dev = kobj_to_dev(kobj); \ + cpumask_var_t mask; \ + ssize_t n; \ + \ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) \ + return -ENOMEM; \ + \ + cpumask_copy(mask, topology_##mask(dev->id)); \ + n = cpumap_print_list_to_buf(buf, mask, off, count); \ + free_cpumask_var(mask); \ \ - return cpumap_print_list_to_buf(buf, topology_##mask(dev->id), \ - off, count); \ + return n; \ } define_id_show_func(physical_package_id, "%d"); @@ -62,50 +78,50 @@ define_id_show_func(ppin, "0x%llx"); static DEVICE_ATTR_ADMIN_RO(ppin); define_siblings_read_func(thread_siblings, sibling_cpumask); -static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES); define_siblings_read_func(core_cpus, sibling_cpumask); -static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES); define_siblings_read_func(core_siblings, core_cpumask); -static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES); #ifdef TOPOLOGY_CLUSTER_SYSFS define_siblings_read_func(cluster_cpus, cluster_cpumask); -static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES); #endif #ifdef TOPOLOGY_DIE_SYSFS define_siblings_read_func(die_cpus, die_cpumask); -static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES); #endif define_siblings_read_func(package_cpus, core_cpumask); -static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES); #ifdef TOPOLOGY_BOOK_SYSFS define_id_show_func(book_id, "%d"); static DEVICE_ATTR_RO(book_id); define_siblings_read_func(book_siblings, book_cpumask); -static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES); #endif #ifdef TOPOLOGY_DRAWER_SYSFS define_id_show_func(drawer_id, "%d"); static DEVICE_ATTR_RO(drawer_id); define_siblings_read_func(drawer_siblings, drawer_cpumask); -static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES); -static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES); +static const BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES); +static const BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES); #endif -static struct bin_attribute *bin_attrs[] = { +static const struct bin_attribute *const bin_attrs[] = { &bin_attr_core_cpus, &bin_attr_core_cpus_list, &bin_attr_thread_siblings, @@ -192,3 +208,55 @@ static int __init topology_sysfs_init(void) } device_initcall(topology_sysfs_init); + +DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; +EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale); + +void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) +{ + per_cpu(cpu_scale, cpu) = capacity; +} + +static ssize_t cpu_capacity_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + + return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); +} + +static DEVICE_ATTR_RO(cpu_capacity); + +static int cpu_capacity_sysctl_add(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpu_dev) + return -ENOENT; + + device_create_file(cpu_dev, &dev_attr_cpu_capacity); + + return 0; +} + +static int cpu_capacity_sysctl_remove(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpu_dev) + return -ENOENT; + + device_remove_file(cpu_dev, &dev_attr_cpu_capacity); + + return 0; +} + +static int register_cpu_capacity_sysctl(void) +{ + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity", + cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove); + + return 0; +} +subsys_initcall(register_cpu_capacity_sysctl); diff --git a/drivers/base/trace.h b/drivers/base/trace.h index e52b6eae060d..3b83b13a57ff 100644 --- a/drivers/base/trace.h +++ b/drivers/base/trace.h @@ -24,18 +24,18 @@ DECLARE_EVENT_CLASS(devres, __field(struct device *, dev) __field(const char *, op) __field(void *, node) - __field(const char *, name) + __string(name, name) __field(size_t, size) ), TP_fast_assign( __assign_str(devname); __entry->op = op; __entry->node = node; - __entry->name = name; + __assign_str(name); __entry->size = size; ), TP_printk("%s %3s %p %s (%zu bytes)", __get_str(devname), - __entry->op, __entry->node, __entry->name, __entry->size) + __entry->op, __entry->node, __get_str(name), __entry->size) ); DEFINE_EVENT(devres, devres_log, |
