diff options
Diffstat (limited to 'drivers/base')
-rw-r--r-- | drivers/base/Kconfig | 8 | ||||
-rw-r--r-- | drivers/base/Makefile | 1 | ||||
-rw-r--r-- | drivers/base/arch_numa.c | 484 | ||||
-rw-r--r-- | drivers/base/auxiliary.c | 13 | ||||
-rw-r--r-- | drivers/base/base.h | 5 | ||||
-rw-r--r-- | drivers/base/bus.c | 19 | ||||
-rw-r--r-- | drivers/base/core.c | 159 | ||||
-rw-r--r-- | drivers/base/init.c | 1 | ||||
-rw-r--r-- | drivers/base/memory.c | 35 | ||||
-rw-r--r-- | drivers/base/node.c | 33 | ||||
-rw-r--r-- | drivers/base/platform.c | 11 | ||||
-rw-r--r-- | drivers/base/power/domain.c | 2 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 62 | ||||
-rw-r--r-- | drivers/base/regmap/regmap-sdw-mbq.c | 10 | ||||
-rw-r--r-- | drivers/base/regmap/regmap-sdw.c | 4 | ||||
-rw-r--r-- | drivers/base/test/Makefile | 1 |
16 files changed, 732 insertions, 116 deletions
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 040be48ce046..ffcbe2bc460e 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -161,7 +161,7 @@ config HMEM_REPORTING default n depends on NUMA help - Enable reporting for heterogenous memory access attributes under + Enable reporting for heterogeneous memory access attributes under their non-uniform memory nodes. source "drivers/base/test/Kconfig" @@ -213,4 +213,10 @@ config GENERIC_ARCH_TOPOLOGY appropriate scaling, sysfs interface for reading capacity values at runtime. +config GENERIC_ARCH_NUMA + bool + help + Enable support for generic NUMA implementation. Currently, RISC-V + and ARM64 use it. + endmenu diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 5e7bf9669a81..8b93a7f291ec 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_PINCTRL) += pinctrl.o obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o +obj-$(CONFIG_GENERIC_ARCH_NUMA) += arch_numa.o obj-y += test/ diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c new file mode 100644 index 000000000000..4cc4e117727d --- /dev/null +++ b/drivers/base/arch_numa.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NUMA support, based on the x86 implementation. + * + * Copyright (C) 2015 Cavium Inc. + * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com> + */ + +#define pr_fmt(fmt) "NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/memblock.h> +#include <linux/module.h> +#include <linux/of.h> + +#include <asm/sections.h> + +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); +nodemask_t numa_nodes_parsed __initdata; +static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; + +static int numa_distance_cnt; +static u8 *numa_distance; +bool numa_off; + +static __init int numa_parse_early_param(char *opt) +{ + if (!opt) + return -EINVAL; + if (str_has_prefix(opt, "off")) + numa_off = true; + + return 0; +} +early_param("numa", numa_parse_early_param); + +cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; +EXPORT_SYMBOL(node_to_cpumask_map); + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +const struct cpumask *cpumask_of_node(int node) +{ + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) + return cpu_none_mask; + + if (WARN_ON(node_to_cpumask_map[node] == NULL)) + return cpu_online_mask; + + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(cpumask_of_node); + +#endif + +static void numa_update_cpu(unsigned int cpu, bool remove) +{ + int nid = cpu_to_node(cpu); + + if (nid == NUMA_NO_NODE) + return; + + if (remove) + cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]); + else + cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); +} + +void numa_add_cpu(unsigned int cpu) +{ + numa_update_cpu(cpu, false); +} + +void numa_remove_cpu(unsigned int cpu) +{ + numa_update_cpu(cpu, true); +} + +void numa_clear_node(unsigned int cpu) +{ + numa_remove_cpu(cpu); + set_cpu_numa_node(cpu, NUMA_NO_NODE); +} + +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: cpumask_of_node() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) + */ +static void __init setup_node_to_cpumask_map(void) +{ + int node; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) + setup_nr_node_ids(); + + /* allocate and clear the mapping */ + for (node = 0; node < nr_node_ids; node++) { + alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); + cpumask_clear(node_to_cpumask_map[node]); + } + + /* cpumask_of_node() will now work */ + pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids); +} + +/* + * Set the cpu to node and mem mapping + */ +void numa_store_cpu_info(unsigned int cpu) +{ + set_cpu_numa_node(cpu, cpu_to_node_map[cpu]); +} + +void __init early_map_cpu_to_node(unsigned int cpu, int nid) +{ + /* fallback to node 0 */ + if (nid < 0 || nid >= MAX_NUMNODES || numa_off) + nid = 0; + + cpu_to_node_map[cpu] = nid; + + /* + * We should set the numa node of cpu0 as soon as possible, because it + * has already been set up online before. cpu_to_node(0) will soon be + * called. + */ + if (!cpu) + set_cpu_numa_node(cpu, nid); +} + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init early_cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(early_cpu_to_node(from), early_cpu_to_node(to)); +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + int nid = early_cpu_to_node(cpu); + + return memblock_alloc_try_nid(size, align, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + +/** + * numa_add_memblk() - Set node id to memblk + * @nid: NUMA node ID of the new memblk + * @start: Start address of the new memblk + * @end: End address of the new memblk + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init numa_add_memblk(int nid, u64 start, u64 end) +{ + int ret; + + ret = memblock_set_node(start, (end - start), &memblock.memory, nid); + if (ret < 0) { + pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n", + start, (end - 1), nid); + return ret; + } + + node_set(nid, numa_nodes_parsed); + return ret; +} + +/* + * Initialize NODE_DATA for a node on the local memory + */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) +{ + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; + + if (start_pfn >= end_pfn) + pr_info("Initmem setup node %d [<memory-less node>]\n", nid); + + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + if (!nd_pa) + panic("Cannot allocate %zu bytes for node %d data\n", + nd_size, nid); + + nd = __va(nd_pa); + + /* report and initialize */ + pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; +} + +/* + * numa_free_distance + * + * The current table is freed. + */ +void __init numa_free_distance(void) +{ + size_t size; + + if (!numa_distance) + return; + + size = numa_distance_cnt * numa_distance_cnt * + sizeof(numa_distance[0]); + + memblock_free(__pa(numa_distance), size); + numa_distance_cnt = 0; + numa_distance = NULL; +} + +/* + * Create a new NUMA distance table. + */ +static int __init numa_alloc_distance(void) +{ + size_t size; + u64 phys; + int i, j; + + size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]); + phys = memblock_find_in_range(0, PFN_PHYS(max_pfn), + size, PAGE_SIZE); + if (WARN_ON(!phys)) + return -ENOMEM; + + memblock_reserve(phys, size); + + numa_distance = __va(phys); + numa_distance_cnt = nr_node_ids; + + /* fill with the default distances */ + for (i = 0; i < numa_distance_cnt; i++) + for (j = 0; j < numa_distance_cnt; j++) + numa_distance[i * numa_distance_cnt + j] = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + + pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt); + + return 0; +} + +/** + * numa_set_distance() - Set inter node NUMA distance from node to node. + * @from: the 'from' node to set distance + * @to: the 'to' node to set distance + * @distance: NUMA distance + * + * Set the distance from node @from to @to to @distance. + * If distance table doesn't exist, a warning is printed. + * + * If @from or @to is higher than the highest known node or lower than zero + * or @distance doesn't make sense, the call is ignored. + */ +void __init numa_set_distance(int from, int to, int distance) +{ + if (!numa_distance) { + pr_warn_once("Warning: distance table not allocated yet\n"); + return; + } + + if (from >= numa_distance_cnt || to >= numa_distance_cnt || + from < 0 || to < 0) { + pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + if ((u8)distance != distance || + (from == to && distance != LOCAL_DISTANCE)) { + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + numa_distance[from * numa_distance_cnt + to] = distance; +} + +/* + * Return NUMA distance @from to @to + */ +int __node_distance(int from, int to) +{ + if (from >= numa_distance_cnt || to >= numa_distance_cnt) + return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; + return numa_distance[from * numa_distance_cnt + to]; +} +EXPORT_SYMBOL(__node_distance); + +static int __init numa_register_nodes(void) +{ + int nid; + struct memblock_region *mblk; + + /* Check that valid nid is set to memblks */ + for_each_mem_region(mblk) { + int mblk_nid = memblock_get_region_node(mblk); + phys_addr_t start = mblk->base; + phys_addr_t end = mblk->base + mblk->size - 1; + + if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) { + pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n", + mblk_nid, &start, &end); + return -EINVAL; + } + } + + /* Finally register nodes. */ + for_each_node_mask(nid, numa_nodes_parsed) { + unsigned long start_pfn, end_pfn; + + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + setup_node_data(nid, start_pfn, end_pfn); + node_set_online(nid); + } + + /* Setup online nodes to actual nodes*/ + node_possible_map = numa_nodes_parsed; + + return 0; +} + +static int __init numa_init(int (*init_func)(void)) +{ + int ret; + + nodes_clear(numa_nodes_parsed); + nodes_clear(node_possible_map); + nodes_clear(node_online_map); + + ret = numa_alloc_distance(); + if (ret < 0) + return ret; + + ret = init_func(); + if (ret < 0) + goto out_free_distance; + + if (nodes_empty(numa_nodes_parsed)) { + pr_info("No NUMA configuration found\n"); + ret = -EINVAL; + goto out_free_distance; + } + + ret = numa_register_nodes(); + if (ret < 0) + goto out_free_distance; + + setup_node_to_cpumask_map(); + + return 0; +out_free_distance: + numa_free_distance(); + return ret; +} + +/** + * dummy_numa_init() - Fallback dummy NUMA init + * + * Used if there's no underlying NUMA architecture, NUMA initialization + * fails, or NUMA is disabled on the command line. + * + * Must online at least one node (node 0) and add memory blocks that cover all + * allowed memory. It is unlikely that this function fails. + * + * Return: 0 on success, -errno on failure. + */ +static int __init dummy_numa_init(void) +{ + phys_addr_t start = memblock_start_of_DRAM(); + phys_addr_t end = memblock_end_of_DRAM() - 1; + int ret; + + if (numa_off) + pr_info("NUMA disabled\n"); /* Forced off on command line. */ + pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end); + + ret = numa_add_memblk(0, start, end + 1); + if (ret) { + pr_err("NUMA init failed\n"); + return ret; + } + + numa_off = true; + return 0; +} + +#ifdef CONFIG_ACPI_NUMA +static int __init arch_acpi_numa_init(void) +{ + int ret; + + ret = acpi_numa_init(); + if (ret) { + pr_info("Failed to initialise from firmware\n"); + return ret; + } + + return srat_disabled() ? -EINVAL : 0; +} +#else +static int __init arch_acpi_numa_init(void) +{ + return -EOPNOTSUPP; +} +#endif + +/** + * arch_numa_init() - Initialize NUMA + * + * Try each configured NUMA initialization method until one succeeds. The + * last fallback is dummy single node config encompassing whole memory. + */ +void __init arch_numa_init(void) +{ + if (!numa_off) { + if (!acpi_disabled && !numa_init(arch_acpi_numa_init)) + return; + if (acpi_disabled && !numa_init(of_numa_init)) + return; + } + + numa_init(dummy_numa_init); +} diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 8336535f1e11..d8b314e7d0fd 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -15,6 +15,7 @@ #include <linux/pm_runtime.h> #include <linux/string.h> #include <linux/auxiliary_bus.h> +#include "base.h" static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, const struct auxiliary_device *auxdev) @@ -260,19 +261,11 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv) } EXPORT_SYMBOL_GPL(auxiliary_driver_unregister); -static int __init auxiliary_bus_init(void) +void __init auxiliary_bus_init(void) { - return bus_register(&auxiliary_bus_type); + WARN_ON(bus_register(&auxiliary_bus_type)); } -static void __exit auxiliary_bus_exit(void) -{ - bus_unregister(&auxiliary_bus_type); -} - -module_init(auxiliary_bus_init); -module_exit(auxiliary_bus_exit); - MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Auxiliary Bus"); MODULE_AUTHOR("David Ertman <david.m.ertman@intel.com>"); diff --git a/drivers/base/base.h b/drivers/base/base.h index f5600a83124f..52b3d7b75c27 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -119,6 +119,11 @@ static inline int hypervisor_init(void) { return 0; } extern int platform_bus_init(void); extern void cpu_dev_init(void); extern void container_dev_init(void); +#ifdef CONFIG_AUXILIARY_BUS +extern void auxiliary_bus_init(void); +#else +static inline void auxiliary_bus_init(void) { } +#endif struct kobject *virtual_device_parent(struct device *dev); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index a9c23ecebc7c..36d0c654ea61 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -633,7 +633,7 @@ int bus_add_driver(struct device_driver *drv) error = driver_add_groups(drv, bus->drv_groups); if (error) { /* How the hell do we get out of this pickle? Give up */ - printk(KERN_ERR "%s: driver_create_groups(%s) failed\n", + printk(KERN_ERR "%s: driver_add_groups(%s) failed\n", __func__, drv->name); } @@ -729,23 +729,6 @@ int device_reprobe(struct device *dev) } EXPORT_SYMBOL_GPL(device_reprobe); -/** - * find_bus - locate bus by name. - * @name: name of bus. - * - * Call kset_find_obj() to iterate over list of buses to - * find a bus by name. Return bus if found. - * - * Note that kset_find_obj increments bus' reference count. - */ -#if 0 -struct bus_type *find_bus(char *name) -{ - struct kobject *k = kset_find_obj(bus_kset, name); - return k ? to_bus(k) : NULL; -} -#endif /* 0 */ - static int bus_add_groups(struct bus_type *bus, const struct attribute_group **groups) { diff --git a/drivers/base/core.c b/drivers/base/core.c index 7c0406e675e9..f29839382f81 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -149,6 +149,21 @@ void fwnode_links_purge(struct fwnode_handle *fwnode) fwnode_links_purge_consumers(fwnode); } +static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) +{ + struct fwnode_handle *child; + + /* Don't purge consumer links of an added child */ + if (fwnode->dev) + return; + + fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_links_purge_consumers(fwnode); + + fwnode_for_each_available_child_node(fwnode, child) + fw_devlink_purge_absent_suppliers(child); +} + #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); DEFINE_STATIC_SRCU(device_links_srcu); @@ -245,7 +260,8 @@ int device_is_dependent(struct device *dev, void *target) return ret; list_for_each_entry(link, &dev->links.consumers, s_node) { - if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) + if ((link->flags & ~DL_FLAG_INFERRED) == + (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) continue; if (link->consumer == target) @@ -318,7 +334,8 @@ static int device_reorder_to_tail(struct device *dev, void *not_used) device_for_each_child(dev, NULL, device_reorder_to_tail); list_for_each_entry(link, &dev->links.consumers, s_node) { - if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) + if ((link->flags & ~DL_FLAG_INFERRED) == + (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) continue; device_reorder_to_tail(link->consumer, NULL); } @@ -566,7 +583,8 @@ postcore_initcall(devlink_class_init); #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ DL_FLAG_AUTOREMOVE_SUPPLIER | \ DL_FLAG_AUTOPROBE_CONSUMER | \ - DL_FLAG_SYNC_STATE_ONLY) + DL_FLAG_SYNC_STATE_ONLY | \ + DL_FLAG_INFERRED) #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) @@ -635,7 +653,7 @@ struct device_link *device_link_add(struct device *consumer, if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_SYNC_STATE_ONLY && - flags != DL_FLAG_SYNC_STATE_ONLY) || + (flags & ~DL_FLAG_INFERRED) != DL_FLAG_SYNC_STATE_ONLY) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) @@ -691,6 +709,10 @@ struct device_link *device_link_add(struct device *consumer, if (link->consumer != consumer) continue; + if (link->flags & DL_FLAG_INFERRED && + !(flags & DL_FLAG_INFERRED)) + link->flags &= ~DL_FLAG_INFERRED; + if (flags & DL_FLAG_PM_RUNTIME) { if (!(link->flags & DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); @@ -950,6 +972,10 @@ int device_links_check_suppliers(struct device *dev) mutex_lock(&fwnode_link_lock); if (dev->fwnode && !list_empty(&dev->fwnode->suppliers) && !fw_devlink_is_permissive()) { + dev_dbg(dev, "probe deferral - wait for supplier %pfwP\n", + list_first_entry(&dev->fwnode->suppliers, + struct fwnode_link, + c_hook)->supplier); mutex_unlock(&fwnode_link_lock); return -EPROBE_DEFER; } @@ -964,6 +990,8 @@ int device_links_check_suppliers(struct device *dev) if (link->status != DL_STATE_AVAILABLE && !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) { device_links_missing_supplier(dev); + dev_dbg(dev, "probe deferral - supplier %s not ready\n", + dev_name(link->supplier)); ret = -EPROBE_DEFER; break; } @@ -1142,12 +1170,22 @@ void device_links_driver_bound(struct device *dev) LIST_HEAD(sync_list); /* - * If a device probes successfully, it's expected to have created all + * If a device binds successfully, it's expected to have created all * the device links it needs to or make new device links as it needs - * them. So, it no longer needs to wait on any suppliers. + * them. So, fw_devlink no longer needs to create device links to any + * of the device's suppliers. + * + * Also, if a child firmware node of this bound device is not added as + * a device by now, assume it is never going to be added and make sure + * other devices don't defer probe indefinitely by waiting for such a + * child device. */ - if (dev->fwnode && dev->fwnode->dev == dev) + if (dev->fwnode && dev->fwnode->dev == dev) { + struct fwnode_handle *child; fwnode_links_purge_suppliers(dev->fwnode); + fwnode_for_each_available_child_node(dev->fwnode, child) + fw_devlink_purge_absent_suppliers(child); + } device_remove_file(dev, &dev_attr_waiting_for_supplier); device_links_write_lock(); @@ -1458,7 +1496,14 @@ static void device_links_purge(struct device *dev) device_links_write_unlock(); } -static u32 fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY; +#define FW_DEVLINK_FLAGS_PERMISSIVE (DL_FLAG_INFERRED | \ + DL_FLAG_SYNC_STATE_ONLY) +#define FW_DEVLINK_FLAGS_ON (DL_FLAG_INFERRED | \ + DL_FLAG_AUTOPROBE_CONSUMER) +#define FW_DEVLINK_FLAGS_RPM (FW_DEVLINK_FLAGS_ON | \ + DL_FLAG_PM_RUNTIME) + +static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE; static int __init fw_devlink_setup(char *arg) { if (!arg) @@ -1467,17 +1512,23 @@ static int __init fw_devlink_setup(char *arg) if (strcmp(arg, "off") == 0) { fw_devlink_flags = 0; } else if (strcmp(arg, "permissive") == 0) { - fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY; + fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE; } else if (strcmp(arg, "on") == 0) { - fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER; + fw_devlink_flags = FW_DEVLINK_FLAGS_ON; } else if (strcmp(arg, "rpm") == 0) { - fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER | - DL_FLAG_PM_RUNTIME; + fw_devlink_flags = FW_DEVLINK_FLAGS_RPM; } return 0; } early_param("fw_devlink", fw_devlink_setup); +static bool fw_devlink_strict; +static int __init fw_devlink_strict_setup(char *arg) +{ + return strtobool(arg, &fw_devlink_strict); +} +early_param("fw_devlink.strict", fw_devlink_strict_setup); + u32 fw_devlink_get_flags(void) { return fw_devlink_flags; @@ -1485,7 +1536,12 @@ u32 fw_devlink_get_flags(void) static bool fw_devlink_is_permissive(void) { - return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY; + return fw_devlink_flags == FW_DEVLINK_FLAGS_PERMISSIVE; +} + +bool fw_devlink_is_strict(void) +{ + return fw_devlink_strict && !fw_devlink_is_permissive(); } static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode) @@ -1508,6 +1564,53 @@ static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode) } /** + * fw_devlink_relax_cycle - Convert cyclic links to SYNC_STATE_ONLY links + * @con: Device to check dependencies for. + * @sup: Device to check against. + * + * Check if @sup depends on @con or any device dependent on it (its child or + * its consumer etc). When such a cyclic dependency is found, convert all + * device links created solely by fw_devlink into SYNC_STATE_ONLY device links. + * This is the equivalent of doing fw_devlink=permissive just between the + * devices in the cycle. We need to do this because, at this point, fw_devlink + * can't tell which of these dependencies is not a real dependency. + * + * Return 1 if a cycle is found. Otherwise, return 0. + */ +static int fw_devlink_relax_cycle(struct device *con, void *sup) +{ + struct device_link *link; + int ret; + + if (con == sup) + return 1; + + ret = device_for_each_child(con, sup, fw_devlink_relax_cycle); + if (ret) + return ret; + + list_for_each_entry(link, &con->links.consumers, s_node) { + if ((link->flags & ~DL_FLAG_INFERRED) == + (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED)) + continue; + + if (!fw_devlink_relax_cycle(link->consumer, sup)) + continue; + + ret = 1; + + if (!(link->flags & DL_FLAG_INFERRED)) + continue; + + pm_runtime_drop_link(link); + link->flags = DL_FLAG_MANAGED | FW_DEVLINK_FLAGS_PERMISSIVE; + dev_dbg(link->consumer, "Relaxing link with %s\n", + dev_name(link->supplier)); + } + return ret; +} + +/** * fw_devlink_create_devlink - Create a device link from a consumer to fwnode * @con - Consumer device for the device link * @sup_handle - fwnode handle of supplier @@ -1535,15 +1638,39 @@ static int fw_devlink_create_devlink(struct device *con, sup_dev = get_dev_from_fwnode(sup_handle); if (sup_dev) { /* + * If it's one of those drivers that don't actually bind to + * their device using driver core, then don't wait on this + * supplier device indefinitely. + */ + if (sup_dev->links.status == DL_DEV_NO_DRIVER && + sup_handle->flags & FWNODE_FLAG_INITIALIZED) { + ret = -EINVAL; + goto out; + } + + /* * If this fails, it is due to cycles in device links. Just * give up on this link and treat it as invalid. */ - if (!device_link_add(con, sup_dev, flags)) + if (!device_link_add(con, sup_dev, flags) && + !(flags & DL_FLAG_SYNC_STATE_ONLY)) { + dev_info(con, "Fixing up cyclic dependency with %s\n", + dev_name(sup_dev)); + device_links_write_lock(); + fw_devlink_relax_cycle(con, sup_dev); + device_links_write_unlock(); + device_link_add(con, sup_dev, + FW_DEVLINK_FLAGS_PERMISSIVE); ret = -EINVAL; + } goto out; } + /* Supplier that's already initialized without a struct device. */ + if (sup_handle->flags & FWNODE_FLAG_INITIALIZED) + return -EINVAL; + /* * DL_FLAG_SYNC_STATE_ONLY doesn't block probing and supports * cycles. So cycle detection isn't necessary and shouldn't be @@ -1632,7 +1759,7 @@ static void __fw_devlink_link_to_consumers(struct device *dev) con_dev = NULL; } else { own_link = false; - dl_flags = DL_FLAG_SYNC_STATE_ONLY; + dl_flags = FW_DEVLINK_FLAGS_PERMISSIVE; } } @@ -1687,7 +1814,7 @@ static void __fw_devlink_link_to_suppliers(struct device *dev, if (own_link) dl_flags = fw_devlink_get_flags(); else - dl_flags = DL_FLAG_SYNC_STATE_ONLY; + dl_flags = FW_DEVLINK_FLAGS_PERMISSIVE; list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) { int ret; diff --git a/drivers/base/init.c b/drivers/base/init.c index 908e6520e804..a9f57c22fb9e 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -32,6 +32,7 @@ void __init driver_init(void) */ of_core_init(); platform_bus_init(); + auxiliary_bus_init(); cpu_dev_init(); memory_dev_init(); container_dev_init(); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index eef4ffb6122c..f35298425575 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -35,7 +35,7 @@ static const char *const online_type_to_str[] = { [MMOP_ONLINE_MOVABLE] = "online_movable", }; -int memhp_online_type_from_str(const char *str) +int mhp_online_type_from_str(const char *str) { int i; @@ -253,7 +253,7 @@ static int memory_subsys_offline(struct device *dev) static ssize_t state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - const int online_type = memhp_online_type_from_str(buf); + const int online_type = mhp_online_type_from_str(buf); struct memory_block *mem = to_memory_block(dev); int ret; @@ -290,20 +290,20 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr, } /* - * phys_device is a bad name for this. What I really want - * is a way to differentiate between memory ranges that - * are part of physical devices that constitute - * a complete removable unit or fru. - * i.e. do these ranges belong to the same physical device, - * s.t. if I offline all of these sections I can then - * remove the physical device? + * Legacy interface that we cannot remove: s390x exposes the storage increment + * covered by a memory block, allowing for identifying which memory blocks + * comprise a storage increment. Since a memory block spans complete + * storage increments nowadays, this interface is basically unused. Other + * archs never exposed != 0. */ static ssize_t phys_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); + unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); - return sysfs_emit(buf, "%d\n", mem->phys_device); + return sysfs_emit(buf, "%d\n", + arch_get_memory_phys_device(start_pfn)); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -387,19 +387,19 @@ static ssize_t auto_online_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - online_type_to_str[memhp_default_online_type]); + online_type_to_str[mhp_default_online_type]); } static ssize_t auto_online_blocks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - const int online_type = memhp_online_type_from_str(buf); + const int online_type = mhp_online_type_from_str(buf); if (online_type < 0) return -EINVAL; - memhp_default_online_type = online_type; + mhp_default_online_type = online_type; return count; } @@ -488,11 +488,7 @@ static DEVICE_ATTR_WO(soft_offline_page); static DEVICE_ATTR_WO(hard_offline_page); #endif -/* - * Note that phys_device is optional. It is here to allow for - * differentiation between which *physical* devices each - * section belongs to... - */ +/* See phys_device_show(). */ int __weak arch_get_memory_phys_device(unsigned long start_pfn) { return 0; @@ -574,7 +570,6 @@ int register_memory(struct memory_block *memory) static int init_memory_block(unsigned long block_id, unsigned long state) { struct memory_block *mem; - unsigned long start_pfn; int ret = 0; mem = find_memory_block_by_id(block_id); @@ -588,8 +583,6 @@ static int init_memory_block(unsigned long block_id, unsigned long state) mem->start_section_nr = block_id * sections_per_block; mem->state = state; - start_pfn = section_nr_to_pfn(mem->start_section_nr); - mem->phys_device = arch_get_memory_phys_device(start_pfn); mem->nid = NUMA_NO_NODE; ret = register_memory(mem); diff --git a/drivers/base/node.c b/drivers/base/node.c index 04f71c7bc3f8..f449dbb2c746 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -372,14 +372,19 @@ static ssize_t node_read_meminfo(struct device *dev, struct pglist_data *pgdat = NODE_DATA(nid); struct sysinfo i; unsigned long sreclaimable, sunreclaimable; + unsigned long swapcached = 0; si_meminfo_node(&i, nid); sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B); sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B); +#ifdef CONFIG_SWAP + swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE); +#endif len = sysfs_emit_at(buf, len, "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" "Node %d MemUsed: %8lu kB\n" + "Node %d SwapCached: %8lu kB\n" "Node %d Active: %8lu kB\n" "Node %d Inactive: %8lu kB\n" "Node %d Active(anon): %8lu kB\n" @@ -391,6 +396,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), + nid, K(swapcached), nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + node_page_state(pgdat, NR_ACTIVE_FILE)), nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + @@ -461,16 +467,11 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(sunreclaimable) #ifdef CONFIG_TRANSPARENT_HUGEPAGE , - nid, K(node_page_state(pgdat, NR_ANON_THPS) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_FILE_THPS) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) * - HPAGE_PMD_NR) + nid, K(node_page_state(pgdat, NR_ANON_THPS)), + nid, K(node_page_state(pgdat, NR_SHMEM_THPS)), + nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), + nid, K(node_page_state(pgdat, NR_FILE_THPS)), + nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED)) #endif ); len += hugetlb_report_node_meminfo(buf, len, nid); @@ -519,10 +520,14 @@ static ssize_t node_read_vmstat(struct device *dev, sum_zone_numa_state(nid, i)); #endif - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) - len += sysfs_emit_at(buf, len, "%s %lu\n", - node_stat_name(i), - node_page_state_pages(pgdat, i)); + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + unsigned long pages = node_page_state_pages(pgdat, i); + + if (vmstat_item_print_in_thp(i)) + pages /= HPAGE_PMD_NR; + len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i), + pages); + } return len; } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ac68328b0aca..6e1f8e0b661c 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1463,13 +1463,16 @@ static int platform_remove(struct device *_dev) { struct platform_driver *drv = to_platform_driver(_dev->driver); struct platform_device *dev = to_platform_device(_dev); - int ret = 0; - if (drv->remove) - ret = drv->remove(dev); + if (drv->remove) { + int ret = drv->remove(dev); + + if (ret) + dev_warn(_dev, "remove callback returned a non-zero value. This will be ignored.\n"); + } dev_pm_domain_detach(_dev, true); - return ret; + return 0; } static void platform_shutdown(struct device *_dev) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index aaf6c83b5cf6..78c310d3179d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2196,6 +2196,7 @@ static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, cp->node = of_node_get(np); cp->data = data; cp->xlate = xlate; + fwnode_dev_initialized(&np->fwnode, true); mutex_lock(&of_genpd_mutex); list_add(&cp->link, &of_genpd_providers); @@ -2385,6 +2386,7 @@ void of_genpd_del_provider(struct device_node *np) } } + fwnode_dev_initialized(&cp->node->fwnode, false); list_del(&cp->link); of_node_put(cp->node); kfree(cp); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index a46a7e30881b..18b82427d0cb 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev) static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { - int retval, idx; bool use_links = dev->power.links_count > 0; + bool get = false; + int retval, idx; + bool put; if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); + } else if (!use_links) { + spin_unlock_irq(&dev->power.lock); } else { + get = dev->power.runtime_status == RPM_RESUMING; + spin_unlock_irq(&dev->power.lock); - /* - * Resume suppliers if necessary. - * - * The device's runtime PM status cannot change until this - * routine returns, so it is safe to read the status outside of - * the lock. - */ - if (use_links && dev->power.runtime_status == RPM_RESUMING) { + /* Resume suppliers if necessary. */ + if (get) { idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); @@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) if (dev->power.irq_safe) { spin_lock(&dev->power.lock); - } else { - /* - * If the device is suspending and the callback has returned - * success, drop the usage counters of the suppliers that have - * been reference counted on its resume. - * - * Do that if resume fails too. - */ - if (use_links - && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) - || (dev->power.runtime_status == RPM_RESUMING && retval))) { - idx = device_links_read_lock(); + return retval; + } - fail: - rpm_put_suppliers(dev); + spin_lock_irq(&dev->power.lock); - device_links_read_unlock(idx); - } + if (!use_links) + return retval; + + /* + * If the device is suspending and the callback has returned success, + * drop the usage counters of the suppliers that have been reference + * counted on its resume. + * + * Do that if the resume fails too. + */ + put = dev->power.runtime_status == RPM_SUSPENDING && !retval; + if (put) + __update_runtime_status(dev, RPM_SUSPENDED); + else + put = get && retval; + + if (put) { + spin_unlock_irq(&dev->power.lock); + + idx = device_links_read_lock(); + +fail: + rpm_put_suppliers(dev); + + device_links_read_unlock(idx); spin_lock_irq(&dev->power.lock); } diff --git a/drivers/base/regmap/regmap-sdw-mbq.c b/drivers/base/regmap/regmap-sdw-mbq.c index 8ce30650b97c..fe3ac26b66ad 100644 --- a/drivers/base/regmap/regmap-sdw-mbq.c +++ b/drivers/base/regmap/regmap-sdw-mbq.c @@ -15,11 +15,11 @@ static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int va struct sdw_slave *slave = dev_to_sdw_dev(dev); int ret; - ret = sdw_write(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff); + ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff); if (ret < 0) return ret; - return sdw_write(slave, reg, val & 0xff); + return sdw_write_no_pm(slave, reg, val & 0xff); } static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val) @@ -29,11 +29,11 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va int read0; int read1; - read0 = sdw_read(slave, reg); + read0 = sdw_read_no_pm(slave, reg); if (read0 < 0) return read0; - read1 = sdw_read(slave, SDW_SDCA_MBQ_CTL(reg)); + read1 = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg)); if (read1 < 0) return read1; @@ -98,4 +98,4 @@ struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw, EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw_mbq); MODULE_DESCRIPTION("Regmap SoundWire MBQ Module"); -MODULE_LICENSE("GPL v2"); +MODULE_LICENSE("GPL"); diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c index c83be26434e7..966de8a136d9 100644 --- a/drivers/base/regmap/regmap-sdw.c +++ b/drivers/base/regmap/regmap-sdw.c @@ -13,7 +13,7 @@ static int regmap_sdw_write(void *context, unsigned int reg, unsigned int val) struct device *dev = context; struct sdw_slave *slave = dev_to_sdw_dev(dev); - return sdw_write(slave, reg, val); + return sdw_write_no_pm(slave, reg, val); } static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val) @@ -22,7 +22,7 @@ static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val) struct sdw_slave *slave = dev_to_sdw_dev(dev); int read; - read = sdw_read(slave, reg); + read = sdw_read_no_pm(slave, reg); if (read < 0) return read; diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile index 3ca56367c84b..2f15fae8625f 100644 --- a/drivers/base/test/Makefile +++ b/drivers/base/test/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE) += test_async_driver_probe.o obj-$(CONFIG_KUNIT_DRIVER_PE_TEST) += property-entry-test.o +CFLAGS_REMOVE_property-entry-test.o += -fplugin-arg-structleak_plugin-byref -fplugin-arg-structleak_plugin-byref-all |