103 files changed, 15377 insertions, 10165 deletions
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index ffcbe2bc460e..1786d87b29e2 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -62,6 +62,17 @@ config DEVTMPFS_MOUNT
 	  rescue mode with init=/bin/sh, even when the /dev directory
 	  on the rootfs is completely empty.
 
+config DEVTMPFS_SAFE
+	bool "Use nosuid,noexec mount options on devtmpfs"
+	depends on DEVTMPFS
+	help
+	  This instructs the kernel to include the MS_NOEXEC and MS_NOSUID mount
+	  flags when mounting devtmpfs.
+
+	  Notice: If enabled, things like /dev/mem cannot be mmapped
+	  with the PROT_EXEC flag. This can break, for example, non-KMS
+	  video drivers.
+
 config STANDALONE
 	bool "Select only drivers that don't need compile-time external firmware"
 	default y
@@ -156,6 +167,12 @@ config PM_QOS_KUNIT_TEST
 	depends on KUNIT=y
 	default KUNIT_ALL_TESTS
 
+config PM_RUNTIME_KUNIT_TEST
+	tristate "KUnit Tests for runtime PM" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	depends on PM
+	default KUNIT_ALL_TESTS
+
 config HMEM_REPORTING
 	bool
 	default n
@@ -215,8 +232,21 @@ config GENERIC_ARCH_TOPOLOGY
 
 config GENERIC_ARCH_NUMA
 	bool
+	select NUMA_MEMBLKS
 	help
 	  Enable support for generic NUMA implementation. Currently, RISC-V
 	  and ARM64 use it.
 
+config FW_DEVLINK_SYNC_STATE_TIMEOUT
+	bool "sync_state() behavior defaults to timeout instead of strict"
+	help
+	  This is build time equivalent of adding kernel command line parameter
+	  "fw_devlink.sync_state=timeout". Give up waiting on consumers and
+	  call sync_state() on any devices that haven't yet received their
+	  sync_state() calls after deferred_probe_timeout has expired or by
+	  late_initcall() if !CONFIG_MODULES. You should almost always want to
+	  select N here unless you have already successfully tested with the
+	  command line option on every system/board your kernel is expected to
+	  work on.
+
 endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index ef8e44a7d288..8074a10183dc 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,25 +6,27 @@ obj-y			:= component.o core.o bus.o dd.o syscore.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
 			   topology.o container.o property.o cacheinfo.o \
-			   swnode.o
+			   swnode.o faux.o
 obj-$(CONFIG_AUXILIARY_BUS) += auxiliary.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-y			+= power/
 obj-$(CONFIG_ISA_BUS_API)	+= isa.o
 obj-y				+= firmware_loader/
 obj-$(CONFIG_NUMA)	+= node.o
-obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o
 ifeq ($(CONFIG_SYSFS),y)
 obj-$(CONFIG_MODULES)	+= module.o
+obj-$(CONFIG_AUXILIARY_BUS) += auxiliary_sysfs.o
 endif
 obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
 obj-$(CONFIG_REGMAP)	+= regmap/
 obj-$(CONFIG_SOC_BUS) += soc.o
 obj-$(CONFIG_PINCTRL) += pinctrl.o
 obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
-obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o
+obj-$(CONFIG_GENERIC_MSI_IRQ) += platform-msi.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
 obj-$(CONFIG_GENERIC_ARCH_NUMA) += arch_numa.o
+obj-$(CONFIG_ACPI) += physical_location.o
 
 obj-y			+= test/
 
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 4cc4e117727d..c99f2ab105e5 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -12,16 +12,12 @@
 #include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/numa_memblks.h>
 
 #include <asm/sections.h>
 
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-nodemask_t numa_nodes_parsed __initdata;
 static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
 
-static int numa_distance_cnt;
-static u8 *numa_distance;
 bool numa_off;
 
 static __init int numa_parse_early_param(char *opt)
@@ -30,6 +26,8 @@ static __init int numa_parse_early_param(char *opt)
 		return -EINVAL;
 	if (str_has_prefix(opt, "off"))
 		numa_off = true;
+	if (!strncmp(opt, "fake=", 5))
+		return numa_emu_cmdline(opt + 5);
 
 	return 0;
 }
@@ -61,6 +59,7 @@ EXPORT_SYMBOL(cpumask_of_node);
 
 #endif
 
+#ifndef CONFIG_NUMA_EMU
 static void numa_update_cpu(unsigned int cpu, bool remove)
 {
 	int nid = cpu_to_node(cpu);
@@ -83,6 +82,7 @@ void numa_remove_cpu(unsigned int cpu)
 {
 	numa_update_cpu(cpu, true);
 }
+#endif
 
 void numa_clear_node(unsigned int cpu)
 {
@@ -144,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
-static int __init early_cpu_to_node(int cpu)
+int early_cpu_to_node(int cpu)
 {
 	return cpu_to_node_map[cpu];
 }
@@ -154,36 +154,34 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
-				       size_t align)
-{
-	int nid = early_cpu_to_node(cpu);
-
-	return  memblock_alloc_try_nid(size, align,
-			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-	memblock_free_early(__pa(ptr), size);
-}
-
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
 	unsigned int cpu;
-	int rc;
+	int rc = -EINVAL;
+
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		/*
+		 * Always reserve area for module percpu variables.  That's
+		 * what the legacy allocator did.
+		 */
+		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+					    pcpu_cpu_distance,
+					    early_cpu_to_node);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+		if (rc < 0)
+			pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
+				   pcpu_fc_names[pcpu_chosen_fc], rc);
+#endif
+	}
 
-	/*
-	 * Always reserve area for module percpu variables.  That's
-	 * what the legacy allocator did.
-	 */
-	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
-				    pcpu_cpu_distance,
-				    pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
+#endif
 	if (rc < 0)
-		panic("Failed to initialize percpu areas.");
+		panic("Failed to initialize percpu areas (err=%d).", rc);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
@@ -191,179 +189,28 @@ void __init setup_per_cpu_areas(void)
 }
 #endif
 
-/**
- * numa_add_memblk() - Set node id to memblk
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end:  End address of the new memblk
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
-	int ret;
-
-	ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
-	if (ret < 0) {
-		pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
-			start, (end - 1), nid);
-		return ret;
-	}
-
-	node_set(nid, numa_nodes_parsed);
-	return ret;
-}
-
 /*
  * Initialize NODE_DATA for a node on the local memory
  */
 static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 {
-	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
-	u64 nd_pa;
-	void *nd;
-	int tnid;
-
 	if (start_pfn >= end_pfn)
 		pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
 
-	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
-	if (!nd_pa)
-		panic("Cannot allocate %zu bytes for node %d data\n",
-		      nd_size, nid);
-
-	nd = __va(nd_pa);
+	alloc_node_data(nid);
 
-	/* report and initialize */
-	pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n",
-		nd_pa, nd_pa + nd_size - 1);
-	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
-	if (tnid != nid)
-		pr_info("NODE_DATA(%d) on node %d\n", nid, tnid);
-
-	node_data[nid] = nd;
-	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
 	NODE_DATA(nid)->node_id = nid;
 	NODE_DATA(nid)->node_start_pfn = start_pfn;
 	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 }
 
-/*
- * numa_free_distance
- *
- * The current table is freed.
- */
-void __init numa_free_distance(void)
-{
-	size_t size;
-
-	if (!numa_distance)
-		return;
-
-	size = numa_distance_cnt * numa_distance_cnt *
-		sizeof(numa_distance[0]);
-
-	memblock_free(__pa(numa_distance), size);
-	numa_distance_cnt = 0;
-	numa_distance = NULL;
-}
-
-/*
- * Create a new NUMA distance table.
- */
-static int __init numa_alloc_distance(void)
-{
-	size_t size;
-	u64 phys;
-	int i, j;
-
-	size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
-	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
-				      size, PAGE_SIZE);
-	if (WARN_ON(!phys))
-		return -ENOMEM;
-
-	memblock_reserve(phys, size);
-
-	numa_distance = __va(phys);
-	numa_distance_cnt = nr_node_ids;
-
-	/* fill with the default distances */
-	for (i = 0; i < numa_distance_cnt; i++)
-		for (j = 0; j < numa_distance_cnt; j++)
-			numa_distance[i * numa_distance_cnt + j] = i == j ?
-				LOCAL_DISTANCE : REMOTE_DISTANCE;
-
-	pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt);
-
-	return 0;
-}
-
-/**
- * numa_set_distance() - Set inter node NUMA distance from node to node.
- * @from: the 'from' node to set distance
- * @to: the 'to'  node to set distance
- * @distance: NUMA distance
- *
- * Set the distance from node @from to @to to @distance.
- * If distance table doesn't exist, a warning is printed.
- *
- * If @from or @to is higher than the highest known node or lower than zero
- * or @distance doesn't make sense, the call is ignored.
- */
-void __init numa_set_distance(int from, int to, int distance)
-{
-	if (!numa_distance) {
-		pr_warn_once("Warning: distance table not allocated yet\n");
-		return;
-	}
-
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
-			from < 0 || to < 0) {
-		pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
-			    from, to, distance);
-		return;
-	}
-
-	if ((u8)distance != distance ||
-	    (from == to && distance != LOCAL_DISTANCE)) {
-		pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
-			     from, to, distance);
-		return;
-	}
-
-	numa_distance[from * numa_distance_cnt + to] = distance;
-}
-
-/*
- * Return NUMA distance @from to @to
- */
-int __node_distance(int from, int to)
-{
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
-		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
-	return numa_distance[from * numa_distance_cnt + to];
-}
-EXPORT_SYMBOL(__node_distance);
-
 static int __init numa_register_nodes(void)
 {
 	int nid;
-	struct memblock_region *mblk;
-
-	/* Check that valid nid is set to memblks */
-	for_each_mem_region(mblk) {
-		int mblk_nid = memblock_get_region_node(mblk);
-		phys_addr_t start = mblk->base;
-		phys_addr_t end = mblk->base + mblk->size - 1;
-
-		if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
-			pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n",
-				mblk_nid, &start, &end);
-			return -EINVAL;
-		}
-	}
+
+	/* Check the validity of the memblock/node mapping */
+	if (!memblock_validate_numa_coverage(0))
+		return -EINVAL;
 
 	/* Finally register nodes. */
 	for_each_node_mask(nid, numa_nodes_parsed) {
@@ -388,11 +235,7 @@ static int __init numa_init(int (*init_func)(void))
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 
-	ret = numa_alloc_distance();
-	if (ret < 0)
-		return ret;
-
-	ret = init_func();
+	ret = numa_memblks_init(init_func, /* memblock_force_top_down */ false);
 	if (ret < 0)
 		goto out_free_distance;
 
@@ -410,7 +253,7 @@ static int __init numa_init(int (*init_func)(void))
 
 	return 0;
 out_free_distance:
-	numa_free_distance();
+	numa_reset_distance();
 	return ret;
 }
 
@@ -440,6 +283,7 @@ static int __init dummy_numa_init(void)
 		pr_err("NUMA init failed\n");
 		return ret;
 	}
+	node_set(0, numa_nodes_parsed);
 
 	numa_off = true;
 	return 0;
@@ -452,7 +296,7 @@ static int __init arch_acpi_numa_init(void)
 
 	ret = acpi_numa_init();
 	if (ret) {
-		pr_info("Failed to initialise from firmware\n");
+		pr_debug("Failed to initialise from firmware\n");
 		return ret;
 	}
 
@@ -482,3 +326,54 @@ void __init arch_numa_init(void)
 
 	numa_init(dummy_numa_init);
 }
+
+#ifdef CONFIG_NUMA_EMU
+void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys,
+					unsigned int nr_emu_nids)
+{
+	int i, j;
+
+	/*
+	 * Transform cpu_to_node_map table to use emulated nids by
+	 * reverse-mapping phys_nid.  The maps should always exist but fall
+	 * back to zero just in case.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cpu_to_node_map); i++) {
+		if (cpu_to_node_map[i] == NUMA_NO_NODE)
+			continue;
+		for (j = 0; j < nr_emu_nids; j++)
+			if (cpu_to_node_map[i] == emu_nid_to_phys[j])
+				break;
+		cpu_to_node_map[i] = j < nr_emu_nids ? j : 0;
+	}
+}
+
+u64 __init numa_emu_dma_end(void)
+{
+	return memblock_start_of_DRAM() + SZ_4G;
+}
+
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable)
+{
+	struct cpumask *mask;
+
+	if (node == NUMA_NO_NODE)
+		return;
+
+	mask = node_to_cpumask_map[node];
+	if (!cpumask_available(mask)) {
+		pr_err("node_to_cpumask_map[%i] NULL\n", node);
+		dump_stack();
+		return;
+	}
+
+	if (enable)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_clear_cpu(cpu, mask);
+
+	pr_debug("%s cpu %d node %d: mask now %*pbl\n",
+		 enable ? "numa_add_cpu" : "numa_remove_cpu",
+		 cpu, node, cpumask_pr_args(mask));
+}
+#endif /* CONFIG_NUMA_EMU */
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 921312a8d957..84ec92bff642 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -7,24 +7,30 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <linux/cleanup.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
 #include <linux/device.h>
 #include <linux/of.h>
 #include <linux/slab.h>
-#include <linux/string.h>
 #include <linux/sched/topology.h>
 #include <linux/cpuset.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
-#include <linux/percpu.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
-#include <linux/smp.h>
+#include <linux/units.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/hw_pressure.h>
 
 static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
 static struct cpumask scale_freq_counters_mask;
 static bool scale_freq_invariant;
+DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 0;
+EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
 
 static bool supports_scale_freq_counters(const struct cpumask *cpus)
 {
@@ -148,57 +154,54 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
 		per_cpu(arch_freq_scale, i) = scale;
 }
 
-DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+DEFINE_PER_CPU(unsigned long, hw_pressure);
 
-void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
+/**
+ * topology_update_hw_pressure() - Update HW pressure for CPUs
+ * @cpus        : The related CPUs for which capacity has been reduced
+ * @capped_freq : The maximum allowed frequency that CPUs can run at
+ *
+ * Update the value of HW pressure for all @cpus in the mask. The
+ * cpumask should include all (online+offline) affected CPUs, to avoid
+ * operating on stale data when hot-plug is used for some CPUs. The
+ * @capped_freq reflects the currently allowed max CPUs frequency due to
+ * HW capping. It might be also a boost frequency value, which is bigger
+ * than the internal 'capacity_freq_ref' max frequency. In such case the
+ * pressure value should simply be removed, since this is an indication that
+ * there is no HW throttling. The @capped_freq must be provided in kHz.
+ */
+void topology_update_hw_pressure(const struct cpumask *cpus,
+				      unsigned long capped_freq)
 {
-	per_cpu(cpu_scale, cpu) = capacity;
-}
+	unsigned long max_capacity, capacity, pressure;
+	u32 max_freq;
+	int cpu;
 
-DEFINE_PER_CPU(unsigned long, thermal_pressure);
+	cpu = cpumask_first(cpus);
+	max_capacity = arch_scale_cpu_capacity(cpu);
+	max_freq = arch_scale_freq_ref(cpu);
 
-void topology_set_thermal_pressure(const struct cpumask *cpus,
-			       unsigned long th_pressure)
-{
-	int cpu;
+	/*
+	 * Handle properly the boost frequencies, which should simply clean
+	 * the HW pressure value.
+	 */
+	if (max_freq <= capped_freq)
+		capacity = max_capacity;
+	else
+		capacity = mult_frac(max_capacity, capped_freq, max_freq);
 
-	for_each_cpu(cpu, cpus)
-		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
-}
+	pressure = max_capacity - capacity;
 
-static ssize_t cpu_capacity_show(struct device *dev,
-				 struct device_attribute *attr,
-				 char *buf)
-{
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	trace_hw_pressure_update(cpu, pressure);
 
-	return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
+	for_each_cpu(cpu, cpus)
+		WRITE_ONCE(per_cpu(hw_pressure, cpu), pressure);
 }
+EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
 
 static void update_topology_flags_workfn(struct work_struct *work);
 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
 
-static DEVICE_ATTR_RO(cpu_capacity);
-
-static int register_cpu_capacity_sysctl(void)
-{
-	int i;
-	struct device *cpu;
-
-	for_each_possible_cpu(i) {
-		cpu = get_cpu_device(i);
-		if (!cpu) {
-			pr_err("%s: too early to get CPU%d device!\n",
-			       __func__, i);
-			continue;
-		}
-		device_create_file(cpu, &dev_attr_cpu_capacity);
-	}
-
-	return 0;
-}
-subsys_initcall(register_cpu_capacity_sysctl);
-
 static int update_topology;
 
 int topology_update_cpu_topology(void)
@@ -218,7 +221,6 @@ static void update_topology_flags_workfn(struct work_struct *work)
 	update_topology = 0;
 }
 
-static DEFINE_PER_CPU(u32, freq_factor) = 1;
 static u32 *raw_capacity;
 
 static int free_raw_capacity(void)
@@ -240,13 +242,15 @@ void topology_normalize_cpu_scale(void)
 
 	capacity_scale = 1;
 	for_each_possible_cpu(cpu) {
-		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
+		capacity = raw_capacity[cpu] *
+			   (per_cpu(capacity_freq_ref, cpu) ?: 1);
 		capacity_scale = max(capacity, capacity_scale);
 	}
 
 	pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
 	for_each_possible_cpu(cpu) {
-		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
+		capacity = raw_capacity[cpu] *
+			   (per_cpu(capacity_freq_ref, cpu) ?: 1);
 		capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
 			capacity_scale);
 		topology_set_cpu_scale(cpu, capacity);
@@ -282,15 +286,15 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 			cpu_node, raw_capacity[cpu]);
 
 		/*
-		 * Update freq_factor for calculating early boot cpu capacities.
+		 * Update capacity_freq_ref for calculating early boot CPU capacities.
 		 * For non-clk CPU DVFS mechanism, there's no way to get the
 		 * frequency value now, assuming they are running at the same
-		 * frequency (by keeping the initial freq_factor value).
+		 * frequency (by keeping the initial capacity_freq_ref value).
 		 */
 		cpu_clk = of_clk_get(cpu_node, 0);
-		if (!PTR_ERR_OR_ZERO(cpu_clk)) {
-			per_cpu(freq_factor, cpu) =
-				clk_get_rate(cpu_clk) / 1000;
+		if (!IS_ERR_OR_NULL(cpu_clk)) {
+			per_cpu(capacity_freq_ref, cpu) =
+				clk_get_rate(cpu_clk) / HZ_PER_KHZ;
 			clk_put(cpu_clk);
 		}
 	} else {
@@ -306,6 +310,70 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 	return !ret;
 }
 
+void __weak freq_inv_set_max_ratio(int cpu, u64 max_rate)
+{
+}
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+static inline void topology_init_cpu_capacity_cppc(void)
+{
+	u64 capacity, capacity_scale = 0;
+	struct cppc_perf_caps perf_caps;
+	int cpu;
+
+	if (likely(!acpi_cpc_valid()))
+		return;
+
+	raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity),
+			       GFP_KERNEL);
+	if (!raw_capacity)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		if (!cppc_get_perf_caps(cpu, &perf_caps) &&
+		    (perf_caps.highest_perf >= perf_caps.nominal_perf) &&
+		    (perf_caps.highest_perf >= perf_caps.lowest_perf)) {
+			raw_capacity[cpu] = perf_caps.highest_perf;
+			capacity_scale = max_t(u64, capacity_scale, raw_capacity[cpu]);
+
+			per_cpu(capacity_freq_ref, cpu) = cppc_perf_to_khz(&perf_caps, raw_capacity[cpu]);
+
+			pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n",
+				 cpu, raw_capacity[cpu]);
+			continue;
+		}
+
+		pr_err("cpu_capacity: CPU%d missing/invalid highest performance.\n", cpu);
+		pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+		goto exit;
+	}
+
+	for_each_possible_cpu(cpu) {
+		freq_inv_set_max_ratio(cpu,
+				       per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ);
+
+		capacity = raw_capacity[cpu];
+		capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
+				     capacity_scale);
+		topology_set_cpu_scale(cpu, capacity);
+		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+			cpu, topology_get_cpu_scale(cpu));
+	}
+
+	schedule_work(&update_topology_flags_work);
+	pr_debug("cpu_capacity: cpu_capacity initialization done\n");
+
+exit:
+	free_raw_capacity();
+}
+void acpi_processor_init_invariance_cppc(void)
+{
+	topology_init_cpu_capacity_cppc();
+}
+#endif
+
 #ifdef CONFIG_CPU_FREQ
 static cpumask_var_t cpus_to_visit;
 static void parsing_done_workfn(struct work_struct *work);
@@ -319,9 +387,6 @@ init_cpu_capacity_callback(struct notifier_block *nb,
 	struct cpufreq_policy *policy = data;
 	int cpu;
 
-	if (!raw_capacity)
-		return 0;
-
 	if (val != CPUFREQ_CREATE_POLICY)
 		return 0;
 
@@ -331,13 +396,18 @@ init_cpu_capacity_callback(struct notifier_block *nb,
 
 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
 
-	for_each_cpu(cpu, policy->related_cpus)
-		per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
+	for_each_cpu(cpu, policy->related_cpus) {
+		per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq;
+		freq_inv_set_max_ratio(cpu,
+				       per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ);
+	}
 
 	if (cpumask_empty(cpus_to_visit)) {
-		topology_normalize_cpu_scale();
-		schedule_work(&update_topology_flags_work);
-		free_raw_capacity();
+		if (raw_capacity) {
+			topology_normalize_cpu_scale();
+			schedule_work(&update_topology_flags_work);
+			free_raw_capacity();
+		}
 		pr_debug("cpu_capacity: parsing done\n");
 		schedule_work(&parsing_done_work);
 	}
@@ -354,11 +424,10 @@ static int __init register_cpufreq_notifier(void)
 	int ret;
 
 	/*
-	 * on ACPI-based systems we need to use the default cpu capacity
-	 * until we have the necessary code to parse the cpu capacity, so
-	 * skip registering cpufreq notifier.
+	 * On ACPI-based systems skip registering cpufreq notifier as cpufreq
+	 * information is not needed for cpu capacity initialization.
 	 */
-	if (!acpi_disabled || !raw_capacity)
+	if (!acpi_disabled)
 		return -EINVAL;
 
 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
@@ -388,6 +457,10 @@ core_initcall(free_raw_capacity);
 #endif
 
 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+
+/* Used to enable the SMT control */
+static unsigned int max_smt_thread_num = 1;
+
 /*
  * This function returns the logic cpu number of the node.
  * There are basically three kinds of return values:
@@ -400,10 +473,10 @@ core_initcall(free_raw_capacity);
  */
 static int __init get_cpu_for_node(struct device_node *node)
 {
-	struct device_node *cpu_node;
 	int cpu;
+	struct device_node *cpu_node __free(device_node) =
+		of_parse_phandle(node, "cpu", 0);
 
-	cpu_node = of_parse_phandle(node, "cpu", 0);
 	if (!cpu_node)
 		return -1;
 
@@ -414,38 +487,40 @@ static int __init get_cpu_for_node(struct device_node *node)
 		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
 			cpu_node, cpumask_pr_args(cpu_possible_mask));
 
-	of_node_put(cpu_node);
 	return cpu;
 }
 
 static int __init parse_core(struct device_node *core, int package_id,
-			     int core_id)
+			     int cluster_id, int core_id)
 {
 	char name[20];
 	bool leaf = true;
 	int i = 0;
 	int cpu;
-	struct device_node *t;
 
 	do {
 		snprintf(name, sizeof(name), "thread%d", i);
-		t = of_get_child_by_name(core, name);
-		if (t) {
-			leaf = false;
-			cpu = get_cpu_for_node(t);
-			if (cpu >= 0) {
-				cpu_topology[cpu].package_id = package_id;
-				cpu_topology[cpu].core_id = core_id;
-				cpu_topology[cpu].thread_id = i;
-			} else if (cpu != -ENODEV) {
-				pr_err("%pOF: Can't get CPU for thread\n", t);
-				of_node_put(t);
-				return -EINVAL;
-			}
-			of_node_put(t);
+		struct device_node *t __free(device_node) =
+			of_get_child_by_name(core, name);
+
+		if (!t)
+			break;
+
+		leaf = false;
+		cpu = get_cpu_for_node(t);
+		if (cpu >= 0) {
+			cpu_topology[cpu].package_id = package_id;
+			cpu_topology[cpu].cluster_id = cluster_id;
+			cpu_topology[cpu].core_id = core_id;
+			cpu_topology[cpu].thread_id = i;
+		} else if (cpu != -ENODEV) {
+			pr_err("%pOF: Can't get CPU for thread\n", t);
+			return -EINVAL;
 		}
 		i++;
-	} while (t);
+	} while (1);
+
+	max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i);
 
 	cpu = get_cpu_for_node(core);
 	if (cpu >= 0) {
@@ -456,6 +531,7 @@ static int __init parse_core(struct device_node *core, int package_id,
 		}
 
 		cpu_topology[cpu].package_id = package_id;
+		cpu_topology[cpu].cluster_id = cluster_id;
 		cpu_topology[cpu].core_id = core_id;
 	} else if (leaf && cpu != -ENODEV) {
 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -465,13 +541,12 @@ static int __init parse_core(struct device_node *core, int package_id,
 	return 0;
 }
 
-static int __init parse_cluster(struct device_node *cluster, int depth)
+static int __init parse_cluster(struct device_node *cluster, int package_id,
+				int cluster_id, int depth)
 {
 	char name[20];
 	bool leaf = true;
 	bool has_cores = false;
-	struct device_node *c;
-	static int package_id __initdata;
 	int core_id = 0;
 	int i, ret;
 
@@ -483,63 +558,103 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
 	i = 0;
 	do {
 		snprintf(name, sizeof(name), "cluster%d", i);
-		c = of_get_child_by_name(cluster, name);
-		if (c) {
-			leaf = false;
-			ret = parse_cluster(c, depth + 1);
-			of_node_put(c);
-			if (ret != 0)
-				return ret;
-		}
+		struct device_node *c __free(device_node) =
+			of_get_child_by_name(cluster, name);
+
+		if (!c)
+			break;
+
+		leaf = false;
+		ret = parse_cluster(c, package_id, i, depth + 1);
+		if (depth > 0)
+			pr_warn("Topology for clusters of clusters not yet supported\n");
+		if (ret != 0)
+			return ret;
 		i++;
-	} while (c);
+	} while (1);
 
 	/* Now check for cores */
 	i = 0;
 	do {
 		snprintf(name, sizeof(name), "core%d", i);
-		c = of_get_child_by_name(cluster, name);
-		if (c) {
-			has_cores = true;
-
-			if (depth == 0) {
-				pr_err("%pOF: cpu-map children should be clusters\n",
-				       c);
-				of_node_put(c);
-				return -EINVAL;
-			}
+		struct device_node *c __free(device_node) =
+			of_get_child_by_name(cluster, name);
 
-			if (leaf) {
-				ret = parse_core(c, package_id, core_id++);
-			} else {
-				pr_err("%pOF: Non-leaf cluster with core %s\n",
-				       cluster, name);
-				ret = -EINVAL;
-			}
+		if (!c)
+			break;
+
+		has_cores = true;
+
+		if (depth == 0) {
+			pr_err("%pOF: cpu-map children should be clusters\n", c);
+			return -EINVAL;
+		}
 
-			of_node_put(c);
+		if (leaf) {
+			ret = parse_core(c, package_id, cluster_id, core_id++);
 			if (ret != 0)
 				return ret;
+		} else {
+			pr_err("%pOF: Non-leaf cluster with core %s\n",
+			       cluster, name);
+			return -EINVAL;
 		}
+
 		i++;
-	} while (c);
+	} while (1);
 
 	if (leaf && !has_cores)
 		pr_warn("%pOF: empty cluster\n", cluster);
 
-	if (leaf)
+	return 0;
+}
+
+static int __init parse_socket(struct device_node *socket)
+{
+	char name[20];
+	bool has_socket = false;
+	int package_id = 0, ret;
+
+	do {
+		snprintf(name, sizeof(name), "socket%d", package_id);
+		struct device_node *c __free(device_node) =
+			of_get_child_by_name(socket, name);
+
+		if (!c)
+			break;
+
+		has_socket = true;
+		ret = parse_cluster(c, package_id, -1, 0);
+		if (ret != 0)
+			return ret;
+
 		package_id++;
+	} while (1);
 
-	return 0;
+	if (!has_socket)
+		ret = parse_cluster(socket, 0, -1, 0);
+
+	/*
+	 * Reset the max_smt_thread_num to 1 on failure. Since on failure
+	 * we need to notify the framework the SMT is not supported, but
+	 * max_smt_thread_num can be initialized to the SMT thread number
+	 * of the cores which are successfully parsed.
+	 */
+	if (ret)
+		max_smt_thread_num = 1;
+
+	cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+
+	return ret;
 }
 
 static int __init parse_dt_topology(void)
 {
-	struct device_node *cn, *map;
 	int ret = 0;
 	int cpu;
+	struct device_node *cn __free(device_node) =
+		of_find_node_by_path("/cpus");
 
-	cn = of_find_node_by_path("/cpus");
 	if (!cn) {
 		pr_err("No CPU information found in DT\n");
 		return 0;
@@ -549,13 +664,15 @@ static int __init parse_dt_topology(void)
 	 * When topology is provided cpu-map is essentially a root
 	 * cluster with restricted subnodes.
 	 */
-	map = of_get_child_by_name(cn, "cpu-map");
+	struct device_node *map __free(device_node) =
+		of_get_child_by_name(cn, "cpu-map");
+
 	if (!map)
-		goto out;
+		return ret;
 
-	ret = parse_cluster(map, 0);
+	ret = parse_socket(map);
 	if (ret != 0)
-		goto out_map;
+		return ret;
 
 	topology_normalize_cpu_scale();
 
@@ -564,13 +681,10 @@ static int __init parse_dt_topology(void)
 	 * only mark cores described in the DT as possible.
 	 */
 	for_each_possible_cpu(cpu)
-		if (cpu_topology[cpu].package_id == -1)
-			ret = -EINVAL;
+		if (cpu_topology[cpu].package_id < 0) {
+			return -EINVAL;
+		}
 
-out_map:
-	of_node_put(map);
-out:
-	of_node_put(cn);
 	return ret;
 }
 #endif
@@ -590,24 +704,51 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 		/* not numa in package, lets use the package siblings */
 		core_mask = &cpu_topology[cpu].core_sibling;
 	}
-	if (cpu_topology[cpu].llc_id != -1) {
+
+	if (last_level_cache_is_valid(cpu)) {
 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
 			core_mask = &cpu_topology[cpu].llc_sibling;
 	}
 
+	/*
+	 * For systems with no shared cpu-side LLC but with clusters defined,
+	 * extend core_mask to cluster_siblings. The sched domain builder will
+	 * then remove MC as redundant with CLS if SCHED_CLUSTER is enabled.
+	 */
+	if (IS_ENABLED(CONFIG_SCHED_CLUSTER) &&
+	    cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling))
+		core_mask = &cpu_topology[cpu].cluster_sibling;
+
 	return core_mask;
 }
 
+const struct cpumask *cpu_clustergroup_mask(int cpu)
+{
+	/*
+	 * Forbid cpu_clustergroup_mask() to span more or the same CPUs as
+	 * cpu_coregroup_mask().
+	 */
+	if (cpumask_subset(cpu_coregroup_mask(cpu),
+			   &cpu_topology[cpu].cluster_sibling))
+		return topology_sibling_cpumask(cpu);
+
+	return &cpu_topology[cpu].cluster_sibling;
+}
+
 void update_siblings_masks(unsigned int cpuid)
 {
 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
-	int cpu;
+	int cpu, ret;
+
+	ret = detect_cache_attributes(cpuid);
+	if (ret && ret != -ENOENT)
+		pr_info("Early cacheinfo allocation failed, ret = %d\n", ret);
 
 	/* update core and thread sibling masks */
 	for_each_online_cpu(cpu) {
 		cpu_topo = &cpu_topology[cpu];
 
-		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
+		if (last_level_cache_is_shared(cpu, cpuid)) {
 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
 		}
@@ -618,6 +759,14 @@ void update_siblings_masks(unsigned int cpuid)
 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
 
+		if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+			continue;
+
+		if (cpuid_topo->cluster_id >= 0) {
+			cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
+			cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
+		}
+
 		if (cpuid_topo->core_id != cpu_topo->core_id)
 			continue;
 
@@ -633,6 +782,9 @@ static void clear_cpu_topology(int cpu)
 	cpumask_clear(&cpu_topo->llc_sibling);
 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
 
+	cpumask_clear(&cpu_topo->cluster_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->cluster_sibling);
+
 	cpumask_clear(&cpu_topo->core_sibling);
 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
 	cpumask_clear(&cpu_topo->thread_sibling);
@@ -648,8 +800,8 @@ void __init reset_cpu_topology(void)
 
 		cpu_topo->thread_id = -1;
 		cpu_topo->core_id = -1;
+		cpu_topo->cluster_id = -1;
 		cpu_topo->package_id = -1;
-		cpu_topo->llc_id = -1;
 
 		clear_cpu_topology(cpu);
 	}
@@ -663,29 +815,158 @@ void remove_cpu_topology(unsigned int cpu)
 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+	for_each_cpu(sibling, topology_cluster_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 
 	clear_cpu_topology(cpu);
 }
 
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+struct cpu_smt_info {
+	unsigned int thread_num;
+	int core_id;
+};
+
+static bool __init acpi_cpu_is_threaded(int cpu)
+{
+	int is_threaded = acpi_pptt_cpu_is_thread(cpu);
+
+	/*
+	 * if the PPTT doesn't have thread information, check for architecture
+	 * specific fallback if available
+	 */
+	if (is_threaded < 0)
+		is_threaded = arch_cpu_is_threaded();
+
+	return !!is_threaded;
+}
+
+/*
+ * Propagate the topology information of the processor_topology_node tree to the
+ * cpu_topology array.
+ */
 __weak int __init parse_acpi_topology(void)
 {
+	unsigned int max_smt_thread_num = 1;
+	struct cpu_smt_info *entry;
+	struct xarray hetero_cpu;
+	unsigned long hetero_id;
+	int cpu, topology_id;
+
+	if (acpi_disabled)
+		return 0;
+
+	xa_init(&hetero_cpu);
+
+	for_each_possible_cpu(cpu) {
+		topology_id = find_acpi_cpu_topology(cpu, 0);
+		if (topology_id < 0)
+			return topology_id;
+
+		if (acpi_cpu_is_threaded(cpu)) {
+			cpu_topology[cpu].thread_id = topology_id;
+			topology_id = find_acpi_cpu_topology(cpu, 1);
+			cpu_topology[cpu].core_id   = topology_id;
+
+			/*
+			 * In the PPTT, CPUs below a node with the 'identical
+			 * implementation' flag have the same number of threads.
+			 * Count the number of threads for only one CPU (i.e.
+			 * one core_id) among those with the same hetero_id.
+			 * See the comment of find_acpi_cpu_topology_hetero_id()
+			 * for more details.
+			 *
+			 * One entry is created for each node having:
+			 * - the 'identical implementation' flag
+			 * - its parent not having the flag
+			 */
+			hetero_id = find_acpi_cpu_topology_hetero_id(cpu);
+			entry = xa_load(&hetero_cpu, hetero_id);
+			if (!entry) {
+				entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+				WARN_ON_ONCE(!entry);
+
+				if (entry) {
+					entry->core_id = topology_id;
+					entry->thread_num = 1;
+					xa_store(&hetero_cpu, hetero_id,
+						 entry, GFP_KERNEL);
+				}
+			} else if (entry->core_id == topology_id) {
+				entry->thread_num++;
+			}
+		} else {
+			cpu_topology[cpu].thread_id  = -1;
+			cpu_topology[cpu].core_id    = topology_id;
+		}
+		topology_id = find_acpi_cpu_topology_cluster(cpu);
+		cpu_topology[cpu].cluster_id = topology_id;
+		topology_id = find_acpi_cpu_topology_package(cpu);
+		cpu_topology[cpu].package_id = topology_id;
+	}
+
+	/*
+	 * This is a short loop since the number of XArray elements is the
+	 * number of heterogeneous CPU clusters. On a homogeneous system
+	 * there's only one entry in the XArray.
+	 */
+	xa_for_each(&hetero_cpu, hetero_id, entry) {
+		max_smt_thread_num = max(max_smt_thread_num, entry->thread_num);
+		xa_erase(&hetero_cpu, hetero_id);
+		kfree(entry);
+	}
+
+	cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+	xa_destroy(&hetero_cpu);
 	return 0;
 }
 
-#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 void __init init_cpu_topology(void)
 {
+	int cpu, ret;
+
 	reset_cpu_topology();
+	ret = parse_acpi_topology();
+	if (!ret)
+		ret = of_have_populated_dt() && parse_dt_topology();
 
-	/*
-	 * Discard anything that was parsed if we hit an error so we
-	 * don't use partial information.
-	 */
-	if (parse_acpi_topology())
-		reset_cpu_topology();
-	else if (of_have_populated_dt() && parse_dt_topology())
+	if (ret) {
+		/*
+		 * Discard anything that was parsed if we hit an error so we
+		 * don't use partial information. But do not return yet to give
+		 * arch-specific early cache level detection a chance to run.
+		 */
 		reset_cpu_topology();
+	}
+
+	for_each_possible_cpu(cpu) {
+		ret = fetch_cache_info(cpu);
+		if (!ret)
+			continue;
+		else if (ret != -ENOENT)
+			pr_err("Early cacheinfo failed, ret = %d\n", ret);
+		return;
+	}
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
+
+	if (cpuid_topo->package_id != -1)
+		goto topology_populated;
+
+	cpuid_topo->thread_id = -1;
+	cpuid_topo->core_id = cpuid;
+	cpuid_topo->package_id = cpu_to_node(cpuid);
+
+	pr_debug("CPU%u: package %d core %d thread %d\n",
+		 cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
+		 cpuid_topo->thread_id);
+
+topology_populated:
+	update_siblings_masks(cpuid);
 }
 #endif
diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c
index 01ef796c2055..b6f941a6ab69 100644
--- a/drivers/base/attribute_container.c
+++ b/drivers/base/attribute_container.c
@@ -346,8 +346,7 @@ attribute_container_device_trigger_safe(struct device *dev,
  * @fn:   the function to execute for each classdev.
  *
  * This function is for executing a trigger when you need to know both
- * the container and the classdev.  If you only care about the
- * container, then use attribute_container_trigger() instead.
+ * the container and the classdev.
  */
 void
 attribute_container_device_trigger(struct device *dev,
@@ -379,33 +378,6 @@ attribute_container_device_trigger(struct device *dev,
 }
 
 /**
- * attribute_container_trigger - trigger a function for each matching container
- *
- * @dev:  The generic device to activate the trigger for
- * @fn:	  the function to trigger
- *
- * This routine triggers a function that only needs to know the
- * matching containers (not the classdev) associated with a device.
- * It is more lightweight than attribute_container_device_trigger, so
- * should be used in preference unless the triggering function
- * actually needs to know the classdev.
- */
-void
-attribute_container_trigger(struct device *dev,
-			    int (*fn)(struct attribute_container *,
-				      struct device *))
-{
-	struct attribute_container *cont;
-
-	mutex_lock(&attribute_container_mutex);
-	list_for_each_entry(cont, &attribute_container_list, node) {
-		if (cont->match(cont, dev))
-			fn(cont, dev);
-	}
-	mutex_unlock(&attribute_container_mutex);
-}
-
-/**
  * attribute_container_add_attrs - add attributes
  *
  * @classdev: The class device
@@ -459,24 +431,6 @@ attribute_container_add_class_device(struct device *classdev)
 }
 
 /**
- * attribute_container_add_class_device_adapter - simple adapter for triggers
- *
- * @cont: the container to register.
- * @dev:  the generic device to activate the trigger for
- * @classdev:	the class device to add
- *
- * This function is identical to attribute_container_add_class_device except
- * that it is designed to be called from the triggers
- */
-int
-attribute_container_add_class_device_adapter(struct attribute_container *cont,
-					     struct device *dev,
-					     struct device *classdev)
-{
-	return attribute_container_add_class_device(classdev);
-}
-
-/**
  * attribute_container_remove_attrs - remove any attribute files
  *
  * @classdev: The class device to remove the files from
diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c
index adc199dfba3c..04bdbff4dbe5 100644
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -17,34 +17,186 @@
 #include <linux/auxiliary_bus.h>
 #include "base.h"
 
+/**
+ * DOC: PURPOSE
+ *
+ * In some subsystems, the functionality of the core device (PCI/ACPI/other) is
+ * too complex for a single device to be managed by a monolithic driver (e.g.
+ * Sound Open Firmware), multiple devices might implement a common intersection
+ * of functionality (e.g. NICs + RDMA), or a driver may want to export an
+ * interface for another subsystem to drive (e.g. SIOV Physical Function export
+ * Virtual Function management).  A split of the functionality into child-
+ * devices representing sub-domains of functionality makes it possible to
+ * compartmentalize, layer, and distribute domain-specific concerns via a Linux
+ * device-driver model.
+ *
+ * An example for this kind of requirement is the audio subsystem where a
+ * single IP is handling multiple entities such as HDMI, Soundwire, local
+ * devices such as mics/speakers etc. The split for the core's functionality
+ * can be arbitrary or be defined by the DSP firmware topology and include
+ * hooks for test/debug. This allows for the audio core device to be minimal
+ * and focused on hardware-specific control and communication.
+ *
+ * Each auxiliary_device represents a part of its parent functionality. The
+ * generic behavior can be extended and specialized as needed by encapsulating
+ * an auxiliary_device within other domain-specific structures and the use of
+ * .ops callbacks. Devices on the auxiliary bus do not share any structures and
+ * the use of a communication channel with the parent is domain-specific.
+ *
+ * Note that ops are intended as a way to augment instance behavior within a
+ * class of auxiliary devices, it is not the mechanism for exporting common
+ * infrastructure from the parent. Consider EXPORT_SYMBOL_NS() to convey
+ * infrastructure from the parent module to the auxiliary module(s).
+ */
+
+/**
+ * DOC: USAGE
+ *
+ * The auxiliary bus is to be used when a driver and one or more kernel
+ * modules, who share a common header file with the driver, need a mechanism to
+ * connect and provide access to a shared object allocated by the
+ * auxiliary_device's registering driver.  The registering driver for the
+ * auxiliary_device(s) and the kernel module(s) registering auxiliary_drivers
+ * can be from the same subsystem, or from multiple subsystems.
+ *
+ * The emphasis here is on a common generic interface that keeps subsystem
+ * customization out of the bus infrastructure.
+ *
+ * One example is a PCI network device that is RDMA-capable and exports a child
+ * device to be driven by an auxiliary_driver in the RDMA subsystem.  The PCI
+ * driver allocates and registers an auxiliary_device for each physical
+ * function on the NIC.  The RDMA driver registers an auxiliary_driver that
+ * claims each of these auxiliary_devices.  This conveys data/ops published by
+ * the parent PCI device/driver to the RDMA auxiliary_driver.
+ *
+ * Another use case is for the PCI device to be split out into multiple sub
+ * functions.  For each sub function an auxiliary_device is created.  A PCI sub
+ * function driver binds to such devices that creates its own one or more class
+ * devices.  A PCI sub function auxiliary device is likely to be contained in a
+ * struct with additional attributes such as user defined sub function number
+ * and optional attributes such as resources and a link to the parent device.
+ * These attributes could be used by systemd/udev; and hence should be
+ * initialized before a driver binds to an auxiliary_device.
+ *
+ * A key requirement for utilizing the auxiliary bus is that there is no
+ * dependency on a physical bus, device, register accesses or regmap support.
+ * These individual devices split from the core cannot live on the platform bus
+ * as they are not physical devices that are controlled by DT/ACPI.  The same
+ * argument applies for not using MFD in this scenario as MFD relies on
+ * individual function devices being physical devices.
+ */
+
+/**
+ * DOC: EXAMPLE
+ *
+ * Auxiliary devices are created and registered by a subsystem-level core
+ * device that needs to break up its functionality into smaller fragments. One
+ * way to extend the scope of an auxiliary_device is to encapsulate it within a
+ * domain-specific structure defined by the parent device. This structure
+ * contains the auxiliary_device and any associated shared data/callbacks
+ * needed to establish the connection with the parent.
+ *
+ * An example is:
+ *
+ * .. code-block:: c
+ *
+ *         struct foo {
+ *		struct auxiliary_device auxdev;
+ *		void (*connect)(struct auxiliary_device *auxdev);
+ *		void (*disconnect)(struct auxiliary_device *auxdev);
+ *		void *data;
+ *        };
+ *
+ * The parent device then registers the auxiliary_device by calling
+ * auxiliary_device_init(), and then auxiliary_device_add(), with the pointer
+ * to the auxdev member of the above structure. The parent provides a name for
+ * the auxiliary_device that, combined with the parent's KBUILD_MODNAME,
+ * creates a match_name that is be used for matching and binding with a driver.
+ *
+ * Whenever an auxiliary_driver is registered, based on the match_name, the
+ * auxiliary_driver's probe() is invoked for the matching devices.  The
+ * auxiliary_driver can also be encapsulated inside custom drivers that make
+ * the core device's functionality extensible by adding additional
+ * domain-specific ops as follows:
+ *
+ * .. code-block:: c
+ *
+ *	struct my_ops {
+ *		void (*send)(struct auxiliary_device *auxdev);
+ *		void (*receive)(struct auxiliary_device *auxdev);
+ *	};
+ *
+ *
+ *	struct my_driver {
+ *		struct auxiliary_driver auxiliary_drv;
+ *		const struct my_ops ops;
+ *	};
+ *
+ * An example of this type of usage is:
+ *
+ * .. code-block:: c
+ *
+ *	const struct auxiliary_device_id my_auxiliary_id_table[] = {
+ *		{ .name = "foo_mod.foo_dev" },
+ *		{ },
+ *	};
+ *
+ *	const struct my_ops my_custom_ops = {
+ *		.send = my_tx,
+ *		.receive = my_rx,
+ *	};
+ *
+ *	const struct my_driver my_drv = {
+ *		.auxiliary_drv = {
+ *			.name = "myauxiliarydrv",
+ *			.id_table = my_auxiliary_id_table,
+ *			.probe = my_probe,
+ *			.remove = my_remove,
+ *			.shutdown = my_shutdown,
+ *		},
+ *		.ops = my_custom_ops,
+ *	};
+ *
+ * Please note that such custom ops approach is valid, but it is hard to implement
+ * it right without global locks per-device to protect from auxiliary_drv removal
+ * during call to that ops. In addition, this implementation lacks proper module
+ * dependency, which causes to load/unload races between auxiliary parent and devices
+ * modules.
+ *
+ * The most easiest way to provide these ops reliably without needing to
+ * have a lock is to EXPORT_SYMBOL*() them and rely on already existing
+ * modules infrastructure for validity and correct dependencies chains.
+ */
+
 static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id,
 							    const struct auxiliary_device *auxdev)
 {
-	for (; id->name[0]; id++) {
-		const char *p = strrchr(dev_name(&auxdev->dev), '.');
-		int match_size;
+	const char *auxdev_name = dev_name(&auxdev->dev);
+	const char *p = strrchr(auxdev_name, '.');
+	int match_size;
 
-		if (!p)
-			continue;
-		match_size = p - dev_name(&auxdev->dev);
+	if (!p)
+		return NULL;
+	match_size = p - auxdev_name;
 
+	for (; id->name[0]; id++) {
 		/* use dev_name(&auxdev->dev) prefix before last '.' char to match to */
 		if (strlen(id->name) == match_size &&
-		    !strncmp(dev_name(&auxdev->dev), id->name, match_size))
+		    !strncmp(auxdev_name, id->name, match_size))
 			return id;
 	}
 	return NULL;
 }
 
-static int auxiliary_match(struct device *dev, struct device_driver *drv)
+static int auxiliary_match(struct device *dev, const struct device_driver *drv)
 {
 	struct auxiliary_device *auxdev = to_auxiliary_dev(dev);
-	struct auxiliary_driver *auxdrv = to_auxiliary_drv(drv);
+	const struct auxiliary_driver *auxdrv = to_auxiliary_drv(drv);
 
 	return !!auxiliary_match_id(auxdrv->id_table, auxdev);
 }
 
-static int auxiliary_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int auxiliary_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
 	const char *name, *p;
 
@@ -62,38 +214,32 @@ static const struct dev_pm_ops auxiliary_dev_pm_ops = {
 
 static int auxiliary_bus_probe(struct device *dev)
 {
-	struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver);
+	const struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver);
 	struct auxiliary_device *auxdev = to_auxiliary_dev(dev);
 	int ret;
 
-	ret = dev_pm_domain_attach(dev, true);
+	ret = dev_pm_domain_attach(dev, PD_FLAG_ATTACH_POWER_ON |
+					PD_FLAG_DETACH_POWER_OFF);
 	if (ret) {
 		dev_warn(dev, "Failed to attach to PM Domain : %d\n", ret);
 		return ret;
 	}
 
-	ret = auxdrv->probe(auxdev, auxiliary_match_id(auxdrv->id_table, auxdev));
-	if (ret)
-		dev_pm_domain_detach(dev, true);
-
-	return ret;
+	return auxdrv->probe(auxdev, auxiliary_match_id(auxdrv->id_table, auxdev));
 }
 
-static int auxiliary_bus_remove(struct device *dev)
+static void auxiliary_bus_remove(struct device *dev)
 {
-	struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver);
+	const struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver);
 	struct auxiliary_device *auxdev = to_auxiliary_dev(dev);
 
 	if (auxdrv->remove)
 		auxdrv->remove(auxdev);
-	dev_pm_domain_detach(dev, true);
-
-	return 0;
 }
 
 static void auxiliary_bus_shutdown(struct device *dev)
 {
-	struct auxiliary_driver *auxdrv = NULL;
+	const struct auxiliary_driver *auxdrv = NULL;
 	struct auxiliary_device *auxdev;
 
 	if (dev->driver) {
@@ -105,7 +251,7 @@ static void auxiliary_bus_shutdown(struct device *dev)
 		auxdrv->shutdown(auxdev);
 }
 
-static struct bus_type auxiliary_bus_type = {
+static const struct bus_type auxiliary_bus_type = {
 	.name = "auxiliary",
 	.probe = auxiliary_bus_probe,
 	.remove = auxiliary_bus_remove,
@@ -119,7 +265,7 @@ static struct bus_type auxiliary_bus_type = {
  * auxiliary_device_init - check auxiliary_device and initialize
  * @auxdev: auxiliary device struct
  *
- * This is the first step in the two-step process to register an
+ * This is the second step in the three-step process to register an
  * auxiliary_device.
  *
  * When this function returns an error code, then the device_initialize will
@@ -148,6 +294,7 @@ int auxiliary_device_init(struct auxiliary_device *auxdev)
 
 	dev->bus = &auxiliary_bus_type;
 	device_initialize(&auxdev->dev);
+	mutex_init(&auxdev->sysfs.lock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(auxiliary_device_init);
@@ -157,7 +304,7 @@ EXPORT_SYMBOL_GPL(auxiliary_device_init);
  * @auxdev: auxiliary bus device to add to the bus
  * @modname: name of the parent device's driver module
  *
- * This is the second step in the two-step process to register an
+ * This is the third step in the three-step process to register an
  * auxiliary_device.
  *
  * This function must be called after a successful call to
@@ -196,41 +343,21 @@ int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname)
 EXPORT_SYMBOL_GPL(__auxiliary_device_add);
 
 /**
- * auxiliary_find_device - auxiliary device iterator for locating a particular device.
- * @start: Device to begin with
- * @data: Data to pass to match function
- * @match: Callback function to check device
- *
- * This function returns a reference to a device that is 'found'
- * for later use, as determined by the @match callback.
- *
- * The callback should return 0 if the device doesn't match and non-zero
- * if it does.  If the callback returns non-zero, this function will
- * return to the caller and not iterate over any more devices.
- */
-struct auxiliary_device *auxiliary_find_device(struct device *start,
-					       const void *data,
-					       int (*match)(struct device *dev, const void *data))
-{
-	struct device *dev;
-
-	dev = bus_find_device(&auxiliary_bus_type, start, data, match);
-	if (!dev)
-		return NULL;
-
-	return to_auxiliary_dev(dev);
-}
-EXPORT_SYMBOL_GPL(auxiliary_find_device);
-
-/**
  * __auxiliary_driver_register - register a driver for auxiliary bus devices
  * @auxdrv: auxiliary_driver structure
  * @owner: owning module/driver
  * @modname: KBUILD_MODNAME for parent driver
+ *
+ * The expectation is that users will call the "auxiliary_driver_register"
+ * macro so that the caller's KBUILD_MODNAME is automatically inserted for the
+ * modname parameter.  Only if a user requires a custom name would this version
+ * be called directly.
  */
 int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
 				struct module *owner, const char *modname)
 {
+	int ret;
+
 	if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table))
 		return -EINVAL;
 
@@ -246,7 +373,11 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
 	auxdrv->driver.bus = &auxiliary_bus_type;
 	auxdrv->driver.mod_name = modname;
 
-	return driver_register(&auxdrv->driver);
+	ret = driver_register(&auxdrv->driver);
+	if (ret)
+		kfree(auxdrv->driver.name);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__auxiliary_driver_register);
 
@@ -261,6 +392,116 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv)
 }
 EXPORT_SYMBOL_GPL(auxiliary_driver_unregister);
 
+static void auxiliary_device_release(struct device *dev)
+{
+	struct auxiliary_device *auxdev = to_auxiliary_dev(dev);
+
+	of_node_put(dev->of_node);
+	kfree(auxdev);
+}
+
+/**
+ * auxiliary_device_create - create a device on the auxiliary bus
+ * @dev: parent device
+ * @modname: module name used to create the auxiliary driver name.
+ * @devname: auxiliary bus device name
+ * @platform_data: auxiliary bus device platform data
+ * @id: auxiliary bus device id
+ *
+ * Helper to create an auxiliary bus device.
+ * The device created matches driver 'modname.devname' on the auxiliary bus.
+ */
+struct auxiliary_device *auxiliary_device_create(struct device *dev,
+						 const char *modname,
+						 const char *devname,
+						 void *platform_data,
+						 int id)
+{
+	struct auxiliary_device *auxdev;
+	int ret;
+
+	auxdev = kzalloc(sizeof(*auxdev), GFP_KERNEL);
+	if (!auxdev)
+		return NULL;
+
+	auxdev->id = id;
+	auxdev->name = devname;
+	auxdev->dev.parent = dev;
+	auxdev->dev.platform_data = platform_data;
+	auxdev->dev.release = auxiliary_device_release;
+	device_set_of_node_from_dev(&auxdev->dev, dev);
+
+	ret = auxiliary_device_init(auxdev);
+	if (ret) {
+		of_node_put(auxdev->dev.of_node);
+		kfree(auxdev);
+		return NULL;
+	}
+
+	ret = __auxiliary_device_add(auxdev, modname);
+	if (ret) {
+		/*
+		 * It may look odd but auxdev should not be freed here.
+		 * auxiliary_device_uninit() calls device_put() which call
+		 * the device release function, freeing auxdev.
+		 */
+		auxiliary_device_uninit(auxdev);
+		return NULL;
+	}
+
+	return auxdev;
+}
+EXPORT_SYMBOL_GPL(auxiliary_device_create);
+
+/**
+ * auxiliary_device_destroy - remove an auxiliary device
+ * @auxdev: pointer to the auxdev to be removed
+ *
+ * Helper to remove an auxiliary device created with
+ * auxiliary_device_create()
+ */
+void auxiliary_device_destroy(void *auxdev)
+{
+	struct auxiliary_device *_auxdev = auxdev;
+
+	auxiliary_device_delete(_auxdev);
+	auxiliary_device_uninit(_auxdev);
+}
+EXPORT_SYMBOL_GPL(auxiliary_device_destroy);
+
+/**
+ * __devm_auxiliary_device_create - create a managed device on the auxiliary bus
+ * @dev: parent device
+ * @modname: module name used to create the auxiliary driver name.
+ * @devname: auxiliary bus device name
+ * @platform_data: auxiliary bus device platform data
+ * @id: auxiliary bus device id
+ *
+ * Device managed helper to create an auxiliary bus device.
+ * The device created matches driver 'modname.devname' on the auxiliary bus.
+ */
+struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev,
+							const char *modname,
+							const char *devname,
+							void *platform_data,
+							int id)
+{
+	struct auxiliary_device *auxdev;
+	int ret;
+
+	auxdev = auxiliary_device_create(dev, modname, devname, platform_data, id);
+	if (!auxdev)
+		return NULL;
+
+	ret = devm_add_action_or_reset(dev, auxiliary_device_destroy,
+				       auxdev);
+	if (ret)
+		return NULL;
+
+	return auxdev;
+}
+EXPORT_SYMBOL_GPL(__devm_auxiliary_device_create);
+
 void __init auxiliary_bus_init(void)
 {
 	WARN_ON(bus_register(&auxiliary_bus_type));
diff --git a/drivers/base/auxiliary_sysfs.c b/drivers/base/auxiliary_sysfs.c
new file mode 100644
index 000000000000..754f21730afd
--- /dev/null
+++ b/drivers/base/auxiliary_sysfs.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/slab.h>
+
+#define AUXILIARY_MAX_IRQ_NAME 11
+
+struct auxiliary_irq_info {
+	struct device_attribute sysfs_attr;
+	char name[AUXILIARY_MAX_IRQ_NAME];
+};
+
+static struct attribute *auxiliary_irq_attrs[] = {
+	NULL
+};
+
+static const struct attribute_group auxiliary_irqs_group = {
+	.name = "irqs",
+	.attrs = auxiliary_irq_attrs,
+};
+
+static int auxiliary_irq_dir_prepare(struct auxiliary_device *auxdev)
+{
+	int ret = 0;
+
+	guard(mutex)(&auxdev->sysfs.lock);
+	if (auxdev->sysfs.irq_dir_exists)
+		return 0;
+
+	ret = devm_device_add_group(&auxdev->dev, &auxiliary_irqs_group);
+	if (ret)
+		return ret;
+
+	auxdev->sysfs.irq_dir_exists = true;
+	xa_init(&auxdev->sysfs.irqs);
+	return 0;
+}
+
+/**
+ * auxiliary_device_sysfs_irq_add - add a sysfs entry for the given IRQ
+ * @auxdev: auxiliary bus device to add the sysfs entry.
+ * @irq: The associated interrupt number.
+ *
+ * This function should be called after auxiliary device have successfully
+ * received the irq.
+ * The driver is responsible to add a unique irq for the auxiliary device. The
+ * driver can invoke this function from multiple thread context safely for
+ * unique irqs of the auxiliary devices. The driver must not invoke this API
+ * multiple times if the irq is already added previously.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq)
+{
+	struct auxiliary_irq_info *info __free(kfree) = NULL;
+	struct device *dev = &auxdev->dev;
+	int ret;
+
+	ret = auxiliary_irq_dir_prepare(auxdev);
+	if (ret)
+		return ret;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	sysfs_attr_init(&info->sysfs_attr.attr);
+	snprintf(info->name, AUXILIARY_MAX_IRQ_NAME, "%d", irq);
+
+	ret = xa_insert(&auxdev->sysfs.irqs, irq, info, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	info->sysfs_attr.attr.name = info->name;
+	ret = sysfs_add_file_to_group(&dev->kobj, &info->sysfs_attr.attr,
+				      auxiliary_irqs_group.name);
+	if (ret)
+		goto sysfs_add_err;
+
+	xa_store(&auxdev->sysfs.irqs, irq, no_free_ptr(info), GFP_KERNEL);
+	return 0;
+
+sysfs_add_err:
+	xa_erase(&auxdev->sysfs.irqs, irq);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_add);
+
+/**
+ * auxiliary_device_sysfs_irq_remove - remove a sysfs entry for the given IRQ
+ * @auxdev: auxiliary bus device to add the sysfs entry.
+ * @irq: the IRQ to remove.
+ *
+ * This function should be called to remove an IRQ sysfs entry.
+ * The driver must invoke this API when IRQ is released by the device.
+ */
+void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq)
+{
+	struct auxiliary_irq_info *info __free(kfree) = xa_load(&auxdev->sysfs.irqs, irq);
+	struct device *dev = &auxdev->dev;
+
+	if (!info) {
+		dev_err(&auxdev->dev, "IRQ %d doesn't exist\n", irq);
+		return;
+	}
+	sysfs_remove_file_from_group(&dev->kobj, &info->sysfs_attr.attr,
+				     auxiliary_irqs_group.name);
+	xa_erase(&auxdev->sysfs.irqs, irq);
+}
+EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_remove);
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 404db83ee5ec..430cbefbc97f 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -27,11 +27,13 @@
  *                 on this bus.
  * @bus - pointer back to the struct bus_type that this structure is associated
  *        with.
+ * @dev_root: Default device to use as the parent.
  *
  * @glue_dirs - "glue" directory to put in-between the parent device to
  *              avoid namespace conflicts
  * @class - pointer back to the struct class that this structure is associated
  *          with.
+ * @lock_key:	Lock class key for use by the lock validator
  *
  * This structure is the one that is the actual kobject allowing struct
  * bus_type/class to be statically allocated safely.  Nothing outside of the
@@ -48,12 +50,31 @@ struct subsys_private {
 	struct klist klist_drivers;
 	struct blocking_notifier_head bus_notifier;
 	unsigned int drivers_autoprobe:1;
-	struct bus_type *bus;
+	const struct bus_type *bus;
+	struct device *dev_root;
 
 	struct kset glue_dirs;
-	struct class *class;
+	const struct class *class;
+
+	struct lock_class_key lock_key;
 };
-#define to_subsys_private(obj) container_of(obj, struct subsys_private, subsys.kobj)
+#define to_subsys_private(obj) container_of_const(obj, struct subsys_private, subsys.kobj)
+
+static inline struct subsys_private *subsys_get(struct subsys_private *sp)
+{
+	if (sp)
+		kset_get(&sp->subsys);
+	return sp;
+}
+
+static inline void subsys_put(struct subsys_private *sp)
+{
+	if (sp)
+		kset_put(&sp->subsys);
+}
+
+struct subsys_private *bus_to_subsys(const struct bus_type *bus);
+struct subsys_private *class_to_subsys(const struct class *class);
 
 struct driver_private {
 	struct kobject kobj;
@@ -64,6 +85,18 @@ struct driver_private {
 };
 #define to_driver(obj) container_of(obj, struct driver_private, kobj)
 
+#ifdef CONFIG_RUST
+/**
+ * struct driver_type - Representation of a Rust driver type.
+ */
+struct driver_type {
+	/**
+	 * @id: Representation of core::any::TypeId.
+	 */
+	u8 id[16];
+} __packed;
+#endif
+
 /**
  * struct device_private - structure to hold the private to the driver core portions of the device structure.
  *
@@ -79,6 +112,7 @@ struct driver_private {
  * @async_driver - pointer to device driver awaiting probe via async_probe
  * @device - pointer back to the struct device that this structure is
  * associated with.
+ * @driver_type - The type of the bound Rust driver.
  * @dead - This device is currently either in the process of or has been
  *	removed from the system. Any asynchronous events scheduled for this
  *	device should exit without taking any action.
@@ -92,9 +126,12 @@ struct device_private {
 	struct klist_node knode_bus;
 	struct klist_node knode_class;
 	struct list_head deferred_probe;
-	struct device_driver *async_driver;
+	const struct device_driver *async_driver;
 	char *deferred_probe_reason;
 	struct device *device;
+#ifdef CONFIG_RUST
+	struct driver_type driver_type;
+#endif
 	u8 dead:1;
 };
 #define to_device_private_parent(obj)	\
@@ -107,90 +144,137 @@ struct device_private {
 	container_of(obj, struct device_private, knode_class)
 
 /* initialisation functions */
-extern int devices_init(void);
-extern int buses_init(void);
-extern int classes_init(void);
-extern int firmware_init(void);
+int devices_init(void);
+int buses_init(void);
+int classes_init(void);
+int firmware_init(void);
 #ifdef CONFIG_SYS_HYPERVISOR
-extern int hypervisor_init(void);
+int hypervisor_init(void);
 #else
 static inline int hypervisor_init(void) { return 0; }
 #endif
-extern int platform_bus_init(void);
-extern void cpu_dev_init(void);
-extern void container_dev_init(void);
+int platform_bus_init(void);
+int faux_bus_init(void);
+void cpu_dev_init(void);
+void container_dev_init(void);
 #ifdef CONFIG_AUXILIARY_BUS
-extern void auxiliary_bus_init(void);
+void auxiliary_bus_init(void);
 #else
 static inline void auxiliary_bus_init(void) { }
 #endif
 
-struct kobject *virtual_device_parent(struct device *dev);
+struct kobject *virtual_device_parent(void);
 
-extern int bus_add_device(struct device *dev);
-extern void bus_probe_device(struct device *dev);
-extern void bus_remove_device(struct device *dev);
+int bus_add_device(struct device *dev);
+void bus_probe_device(struct device *dev);
+void bus_remove_device(struct device *dev);
+void bus_notify(struct device *dev, enum bus_notifier_event value);
+bool bus_is_registered(const struct bus_type *bus);
 
-extern int bus_add_driver(struct device_driver *drv);
-extern void bus_remove_driver(struct device_driver *drv);
-extern void device_release_driver_internal(struct device *dev,
-					   struct device_driver *drv,
-					   struct device *parent);
+int bus_add_driver(struct device_driver *drv);
+void bus_remove_driver(struct device_driver *drv);
+void device_release_driver_internal(struct device *dev, const struct device_driver *drv,
+				    struct device *parent);
 
-extern void driver_detach(struct device_driver *drv);
-extern void driver_deferred_probe_del(struct device *dev);
-extern void device_set_deferred_probe_reason(const struct device *dev,
-					     struct va_format *vaf);
-static inline int driver_match_device(struct device_driver *drv,
+void driver_detach(const struct device_driver *drv);
+void driver_deferred_probe_del(struct device *dev);
+void device_set_deferred_probe_reason(const struct device *dev, struct va_format *vaf);
+static inline int driver_match_device(const struct device_driver *drv,
 				      struct device *dev)
 {
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
-extern bool driver_allows_async_probing(struct device_driver *drv);
 
-extern int driver_add_groups(struct device_driver *drv,
-			     const struct attribute_group **groups);
-extern void driver_remove_groups(struct device_driver *drv,
-				 const struct attribute_group **groups);
+static inline void dev_sync_state(struct device *dev)
+{
+	if (dev->bus->sync_state)
+		dev->bus->sync_state(dev);
+	else if (dev->driver && dev->driver->sync_state)
+		dev->driver->sync_state(dev);
+}
+
+int driver_add_groups(const struct device_driver *drv, const struct attribute_group **groups);
+void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups);
 void device_driver_detach(struct device *dev);
 
-extern char *make_class_name(const char *name, struct kobject *kobj);
+static inline void device_set_driver(struct device *dev, const struct device_driver *drv)
+{
+	/*
+	 * Majority (all?) read accesses to dev->driver happens either
+	 * while holding device lock or in bus/driver code that is only
+	 * invoked when the device is bound to a driver and there is no
+	 * concern of the pointer being changed while it is being read.
+	 * However when reading device's uevent file we read driver pointer
+	 * without taking device lock (so we do not block there for
+	 * arbitrary amount of time). We use WRITE_ONCE() here to prevent
+	 * tearing so that READ_ONCE() can safely be used in uevent code.
+	 */
+	// FIXME - this cast should not be needed "soon"
+	WRITE_ONCE(dev->driver, (struct device_driver *)drv);
+}
 
-extern int devres_release_all(struct device *dev);
-extern void device_block_probing(void);
-extern void device_unblock_probing(void);
+int devres_release_all(struct device *dev);
+void device_block_probing(void);
+void device_unblock_probing(void);
+void deferred_probe_extend_timeout(void);
+void driver_deferred_probe_trigger(void);
+const char *device_get_devnode(const struct device *dev, umode_t *mode,
+			       kuid_t *uid, kgid_t *gid, const char **tmp);
 
 /* /sys/devices directory */
 extern struct kset *devices_kset;
-extern void devices_kset_move_last(struct device *dev);
+void devices_kset_move_last(struct device *dev);
 
 #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
-extern void module_add_driver(struct module *mod, struct device_driver *drv);
-extern void module_remove_driver(struct device_driver *drv);
+int module_add_driver(struct module *mod, const struct device_driver *drv);
+void module_remove_driver(const struct device_driver *drv);
 #else
-static inline void module_add_driver(struct module *mod,
-				     struct device_driver *drv) { }
+static inline int module_add_driver(struct module *mod,
+				    struct device_driver *drv)
+{
+	return 0;
+}
 static inline void module_remove_driver(struct device_driver *drv) { }
 #endif
 
 #ifdef CONFIG_DEVTMPFS
-extern int devtmpfs_init(void);
+int devtmpfs_init(void);
 #else
 static inline int devtmpfs_init(void) { return 0; }
 #endif
 
+#ifdef CONFIG_BLOCK
+extern const struct class block_class;
+static inline bool is_blockdev(struct device *dev)
+{
+	return dev->class == &block_class;
+}
+#else
+static inline bool is_blockdev(struct device *dev) { return false; }
+#endif
+
 /* Device links support */
-extern int device_links_read_lock(void);
-extern void device_links_read_unlock(int idx);
-extern int device_links_read_lock_held(void);
-extern int device_links_check_suppliers(struct device *dev);
-extern void device_links_force_bind(struct device *dev);
-extern void device_links_driver_bound(struct device *dev);
-extern void device_links_driver_cleanup(struct device *dev);
-extern void device_links_no_driver(struct device *dev);
-extern bool device_links_busy(struct device *dev);
-extern void device_links_unbind_consumers(struct device *dev);
-extern void fw_devlink_drivers_done(void);
+int device_links_read_lock(void);
+void device_links_read_unlock(int idx);
+int device_links_read_lock_held(void);
+int device_links_check_suppliers(struct device *dev);
+void device_links_force_bind(struct device *dev);
+void device_links_driver_bound(struct device *dev);
+void device_links_driver_cleanup(struct device *dev);
+void device_links_no_driver(struct device *dev);
+bool device_links_busy(struct device *dev);
+void device_links_unbind_consumers(struct device *dev);
+bool device_link_flag_is_sync_state_only(u32 flags);
+void fw_devlink_drivers_done(void);
+void fw_devlink_probing_done(void);
+
+#define dev_for_each_link_to_supplier(__link, __dev)	\
+	list_for_each_entry_srcu(__link, &(__dev)->links.suppliers, c_node, \
+				 device_links_read_lock_held())
+
+#define dev_for_each_link_to_consumer(__link, __dev)	\
+	list_for_each_entry_srcu(__link, &(__dev)->links.consumers, s_node, \
+				 device_links_read_lock_held())
 
 /* device pm support */
 void device_pm_move_to_tail(struct device *dev);
@@ -202,3 +286,6 @@ int devtmpfs_delete_node(struct device *dev);
 static inline int devtmpfs_create_node(struct device *dev) { return 0; }
 static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
 #endif
+
+void software_node_notify(struct device *dev);
+void software_node_notify_remove(struct device *dev);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 1f6b4bd61056..9eb7771706f0 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2002-3 Open Source Development Labs
  * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de>
  * Copyright (c) 2007 Novell Inc.
+ * Copyright (c) 2023 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  */
 
 #include <linux/async.h>
@@ -24,6 +25,9 @@
 /* /sys/devices/system */
 static struct kset *system_kset;
 
+/* /sys/bus */
+static struct kset *bus_kset;
+
 #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr)
 
 /*
@@ -39,19 +43,63 @@ static struct kset *system_kset;
 static int __must_check bus_rescan_devices_helper(struct device *dev,
 						void *data);
 
-static struct bus_type *bus_get(struct bus_type *bus)
+/**
+ * bus_to_subsys - Turn a struct bus_type into a struct subsys_private
+ *
+ * @bus: pointer to the struct bus_type to look up
+ *
+ * The driver core internals needs to work on the subsys_private structure, not
+ * the external struct bus_type pointer.  This function walks the list of
+ * registered busses in the system and finds the matching one and returns the
+ * internal struct subsys_private that relates to that bus.
+ *
+ * Note, the reference count of the return value is INCREMENTED if it is not
+ * NULL.  A call to subsys_put() must be done when finished with the pointer in
+ * order for it to be properly freed.
+ */
+struct subsys_private *bus_to_subsys(const struct bus_type *bus)
 {
-	if (bus) {
-		kset_get(&bus->p->subsys);
-		return bus;
+	struct subsys_private *sp = NULL;
+	struct kobject *kobj;
+
+	if (!bus || !bus_kset)
+		return NULL;
+
+	spin_lock(&bus_kset->list_lock);
+
+	if (list_empty(&bus_kset->list))
+		goto done;
+
+	list_for_each_entry(kobj, &bus_kset->list, entry) {
+		struct kset *kset = container_of(kobj, struct kset, kobj);
+
+		sp = container_of_const(kset, struct subsys_private, subsys);
+		if (sp->bus == bus)
+			goto done;
 	}
+	sp = NULL;
+done:
+	sp = subsys_get(sp);
+	spin_unlock(&bus_kset->list_lock);
+	return sp;
+}
+
+static const struct bus_type *bus_get(const struct bus_type *bus)
+{
+	struct subsys_private *sp = bus_to_subsys(bus);
+
+	if (sp)
+		return bus;
 	return NULL;
 }
 
-static void bus_put(struct bus_type *bus)
+static void bus_put(const struct bus_type *bus)
 {
-	if (bus)
-		kset_put(&bus->p->subsys);
+	struct subsys_private *sp = bus_to_subsys(bus);
+
+	/* two puts are required as the call to bus_to_subsys incremented it again */
+	subsys_put(sp);
+	subsys_put(sp);
 }
 
 static ssize_t drv_attr_show(struct kobject *kobj, struct attribute *attr,
@@ -91,7 +139,7 @@ static void driver_release(struct kobject *kobj)
 	kfree(drv_priv);
 }
 
-static struct kobj_type driver_ktype = {
+static const struct kobj_type driver_ktype = {
 	.sysfs_ops	= &driver_sysfs_ops,
 	.release	= driver_release,
 };
@@ -104,7 +152,8 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr,
 {
 	struct bus_attribute *bus_attr = to_bus_attr(attr);
 	struct subsys_private *subsys_priv = to_subsys_private(kobj);
-	ssize_t ret = 0;
+	/* return -EIO for reading a bus attribute without show() */
+	ssize_t ret = -EIO;
 
 	if (bus_attr->show)
 		ret = bus_attr->show(subsys_priv->bus, buf);
@@ -116,7 +165,8 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
 {
 	struct bus_attribute *bus_attr = to_bus_attr(attr);
 	struct subsys_private *subsys_priv = to_subsys_private(kobj);
-	ssize_t ret = 0;
+	/* return -EIO for writing a bus attribute without store() */
+	ssize_t ret = -EIO;
 
 	if (bus_attr->store)
 		ret = bus_attr->store(subsys_priv->bus, buf, count);
@@ -128,44 +178,49 @@ static const struct sysfs_ops bus_sysfs_ops = {
 	.store	= bus_attr_store,
 };
 
-int bus_create_file(struct bus_type *bus, struct bus_attribute *attr)
+int bus_create_file(const struct bus_type *bus, struct bus_attribute *attr)
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
 	int error;
-	if (bus_get(bus)) {
-		error = sysfs_create_file(&bus->p->subsys.kobj, &attr->attr);
-		bus_put(bus);
-	} else
-		error = -EINVAL;
+
+	if (!sp)
+		return -EINVAL;
+
+	error = sysfs_create_file(&sp->subsys.kobj, &attr->attr);
+
+	subsys_put(sp);
 	return error;
 }
 EXPORT_SYMBOL_GPL(bus_create_file);
 
-void bus_remove_file(struct bus_type *bus, struct bus_attribute *attr)
+void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr)
 {
-	if (bus_get(bus)) {
-		sysfs_remove_file(&bus->p->subsys.kobj, &attr->attr);
-		bus_put(bus);
-	}
+	struct subsys_private *sp = bus_to_subsys(bus);
+
+	if (!sp)
+		return;
+
+	sysfs_remove_file(&sp->subsys.kobj, &attr->attr);
+	subsys_put(sp);
 }
 EXPORT_SYMBOL_GPL(bus_remove_file);
 
 static void bus_release(struct kobject *kobj)
 {
 	struct subsys_private *priv = to_subsys_private(kobj);
-	struct bus_type *bus = priv->bus;
 
+	lockdep_unregister_key(&priv->lock_key);
 	kfree(priv);
-	bus->p = NULL;
 }
 
-static struct kobj_type bus_ktype = {
+static const struct kobj_type bus_ktype = {
 	.sysfs_ops	= &bus_sysfs_ops,
 	.release	= bus_release,
 };
 
-static int bus_uevent_filter(struct kset *kset, struct kobject *kobj)
+static int bus_uevent_filter(const struct kobject *kobj)
 {
-	struct kobj_type *ktype = get_ktype(kobj);
+	const struct kobj_type *ktype = get_ktype(kobj);
 
 	if (ktype == &bus_ktype)
 		return 1;
@@ -176,13 +231,11 @@ static const struct kset_uevent_ops bus_uevent_ops = {
 	.filter = bus_uevent_filter,
 };
 
-static struct kset *bus_kset;
-
 /* Manually detach a device from its associated driver. */
 static ssize_t unbind_store(struct device_driver *drv, const char *buf,
 			    size_t count)
 {
-	struct bus_type *bus = bus_get(drv->bus);
+	const struct bus_type *bus = bus_get(drv->bus);
 	struct device *dev;
 	int err = -ENODEV;
 
@@ -195,7 +248,7 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
 	bus_put(bus);
 	return err;
 }
-static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, S_IWUSR, NULL, unbind_store);
+static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store);
 
 /*
  * Manually attach a device to a driver.
@@ -205,7 +258,7 @@ static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, S_IWUSR, NULL, unbind_store);
 static ssize_t bind_store(struct device_driver *drv, const char *buf,
 			  size_t count)
 {
-	struct bus_type *bus = bus_get(drv->bus);
+	const struct bus_type *bus = bus_get(drv->bus);
 	struct device *dev;
 	int err = -ENODEV;
 
@@ -221,24 +274,39 @@ static ssize_t bind_store(struct device_driver *drv, const char *buf,
 	bus_put(bus);
 	return err;
 }
-static DRIVER_ATTR_IGNORE_LOCKDEP(bind, S_IWUSR, NULL, bind_store);
+static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store);
 
-static ssize_t drivers_autoprobe_show(struct bus_type *bus, char *buf)
+static ssize_t drivers_autoprobe_show(const struct bus_type *bus, char *buf)
 {
-	return sysfs_emit(buf, "%d\n", bus->p->drivers_autoprobe);
+	struct subsys_private *sp = bus_to_subsys(bus);
+	int ret;
+
+	if (!sp)
+		return -EINVAL;
+
+	ret = sysfs_emit(buf, "%d\n", sp->drivers_autoprobe);
+	subsys_put(sp);
+	return ret;
 }
 
-static ssize_t drivers_autoprobe_store(struct bus_type *bus,
+static ssize_t drivers_autoprobe_store(const struct bus_type *bus,
 				       const char *buf, size_t count)
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
+
+	if (!sp)
+		return -EINVAL;
+
 	if (buf[0] == '0')
-		bus->p->drivers_autoprobe = 0;
+		sp->drivers_autoprobe = 0;
 	else
-		bus->p->drivers_autoprobe = 1;
+		sp->drivers_autoprobe = 1;
+
+	subsys_put(sp);
 	return count;
 }
 
-static ssize_t drivers_probe_store(struct bus_type *bus,
+static ssize_t drivers_probe_store(const struct bus_type *bus,
 				   const char *buf, size_t count)
 {
 	struct device *dev;
@@ -266,6 +334,19 @@ static struct device *next_device(struct klist_iter *i)
 	return dev;
 }
 
+static struct device *prev_device(struct klist_iter *i)
+{
+	struct klist_node *n = klist_prev(i);
+	struct device *dev = NULL;
+	struct device_private *dev_prv;
+
+	if (n) {
+		dev_prv = to_device_private_bus(n);
+		dev = dev_prv->device;
+	}
+	return dev;
+}
+
 /**
  * bus_for_each_dev - device iterator.
  * @bus: bus type.
@@ -285,21 +366,23 @@ static struct device *next_device(struct klist_iter *i)
  * to retain this data, it should do so, and increment the reference
  * count in the supplied callback.
  */
-int bus_for_each_dev(struct bus_type *bus, struct device *start,
-		     void *data, int (*fn)(struct device *, void *))
+int bus_for_each_dev(const struct bus_type *bus, struct device *start,
+		     void *data, device_iter_t fn)
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
 	struct klist_iter i;
 	struct device *dev;
 	int error = 0;
 
-	if (!bus || !bus->p)
+	if (!sp)
 		return -EINVAL;
 
-	klist_iter_init_node(&bus->p->klist_devices, &i,
+	klist_iter_init_node(&sp->klist_devices, &i,
 			     (start ? &start->p->knode_bus : NULL));
 	while (!error && (dev = next_device(&i)))
 		error = fn(dev, data);
 	klist_iter_exit(&i);
+	subsys_put(sp);
 	return error;
 }
 EXPORT_SYMBOL_GPL(bus_for_each_dev);
@@ -319,66 +402,55 @@ EXPORT_SYMBOL_GPL(bus_for_each_dev);
  * if it does.  If the callback returns non-zero, this function will
  * return to the caller and not iterate over any more devices.
  */
-struct device *bus_find_device(struct bus_type *bus,
+struct device *bus_find_device(const struct bus_type *bus,
 			       struct device *start, const void *data,
-			       int (*match)(struct device *dev, const void *data))
+			       device_match_t match)
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
 	struct klist_iter i;
 	struct device *dev;
 
-	if (!bus || !bus->p)
+	if (!sp)
 		return NULL;
 
-	klist_iter_init_node(&bus->p->klist_devices, &i,
+	klist_iter_init_node(&sp->klist_devices, &i,
 			     (start ? &start->p->knode_bus : NULL));
-	while ((dev = next_device(&i)))
-		if (match(dev, data) && get_device(dev))
+	while ((dev = next_device(&i))) {
+		if (match(dev, data)) {
+			get_device(dev);
 			break;
+		}
+	}
 	klist_iter_exit(&i);
+	subsys_put(sp);
 	return dev;
 }
 EXPORT_SYMBOL_GPL(bus_find_device);
 
-/**
- * subsys_find_device_by_id - find a device with a specific enumeration number
- * @subsys: subsystem
- * @id: index 'id' in struct device
- * @hint: device to check first
- *
- * Check the hint's next object and if it is a match return it directly,
- * otherwise, fall back to a full list search. Either way a reference for
- * the returned object is taken.
- */
-struct device *subsys_find_device_by_id(struct bus_type *subsys, unsigned int id,
-					struct device *hint)
+struct device *bus_find_device_reverse(const struct bus_type *bus,
+				       struct device *start, const void *data,
+				       device_match_t match)
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
 	struct klist_iter i;
 	struct device *dev;
 
-	if (!subsys)
+	if (!sp)
 		return NULL;
 
-	if (hint) {
-		klist_iter_init_node(&subsys->p->klist_devices, &i, &hint->p->knode_bus);
-		dev = next_device(&i);
-		if (dev && dev->id == id && get_device(dev)) {
-			klist_iter_exit(&i);
-			return dev;
-		}
-		klist_iter_exit(&i);
-	}
-
-	klist_iter_init_node(&subsys->p->klist_devices, &i, NULL);
-	while ((dev = next_device(&i))) {
-		if (dev->id == id && get_device(dev)) {
-			klist_iter_exit(&i);
-			return dev;
+	klist_iter_init_node(&sp->klist_devices, &i,
+			     (start ? &start->p->knode_bus : NULL));
+	while ((dev = prev_device(&i))) {
+		if (match(dev, data)) {
+			get_device(dev);
+			break;
 		}
 	}
 	klist_iter_exit(&i);
-	return NULL;
+	subsys_put(sp);
+	return dev;
 }
-EXPORT_SYMBOL_GPL(subsys_find_device_by_id);
+EXPORT_SYMBOL_GPL(bus_find_device_reverse);
 
 static struct device_driver *next_driver(struct klist_iter *i)
 {
@@ -411,21 +483,23 @@ static struct device_driver *next_driver(struct klist_iter *i)
  * in the callback. It must also be sure to increment the refcount
  * so it doesn't disappear before returning to the caller.
  */
-int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
+int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start,
 		     void *data, int (*fn)(struct device_driver *, void *))
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
 	struct klist_iter i;
 	struct device_driver *drv;
 	int error = 0;
 
-	if (!bus)
+	if (!sp)
 		return -EINVAL;
 
-	klist_iter_init_node(&bus->p->klist_drivers, &i,
+	klist_iter_init_node(&sp->klist_drivers, &i,
 			     start ? &start->p->knode_bus : NULL);
 	while ((drv = next_driver(&i)) && !error)
 		error = fn(drv, data);
 	klist_iter_exit(&i);
+	subsys_put(sp);
 	return error;
 }
 EXPORT_SYMBOL_GPL(bus_for_each_drv);
@@ -440,32 +514,46 @@ EXPORT_SYMBOL_GPL(bus_for_each_drv);
  */
 int bus_add_device(struct device *dev)
 {
-	struct bus_type *bus = bus_get(dev->bus);
-	int error = 0;
+	struct subsys_private *sp = bus_to_subsys(dev->bus);
+	int error;
 
-	if (bus) {
-		pr_debug("bus: '%s': add device %s\n", bus->name, dev_name(dev));
-		error = device_add_groups(dev, bus->dev_groups);
-		if (error)
-			goto out_put;
-		error = sysfs_create_link(&bus->p->devices_kset->kobj,
-						&dev->kobj, dev_name(dev));
-		if (error)
-			goto out_groups;
-		error = sysfs_create_link(&dev->kobj,
-				&dev->bus->p->subsys.kobj, "subsystem");
-		if (error)
-			goto out_subsys;
-		klist_add_tail(&dev->p->knode_bus, &bus->p->klist_devices);
+	if (!sp) {
+		/*
+		 * This is a normal operation for many devices that do not
+		 * have a bus assigned to them, just say that all went
+		 * well.
+		 */
+		return 0;
 	}
+
+	/*
+	 * Reference in sp is now incremented and will be dropped when
+	 * the device is removed from the bus
+	 */
+
+	pr_debug("bus: '%s': add device %s\n", sp->bus->name, dev_name(dev));
+
+	error = device_add_groups(dev, sp->bus->dev_groups);
+	if (error)
+		goto out_put;
+
+	error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev));
+	if (error)
+		goto out_groups;
+
+	error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem");
+	if (error)
+		goto out_subsys;
+
+	klist_add_tail(&dev->p->knode_bus, &sp->klist_devices);
 	return 0;
 
 out_subsys:
-	sysfs_remove_link(&bus->p->devices_kset->kobj, dev_name(dev));
+	sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev));
 out_groups:
-	device_remove_groups(dev, bus->dev_groups);
+	device_remove_groups(dev, sp->bus->dev_groups);
 out_put:
-	bus_put(dev->bus);
+	subsys_put(sp);
 	return error;
 }
 
@@ -477,20 +565,20 @@ out_put:
  */
 void bus_probe_device(struct device *dev)
 {
-	struct bus_type *bus = dev->bus;
+	struct subsys_private *sp = bus_to_subsys(dev->bus);
 	struct subsys_interface *sif;
 
-	if (!bus)
+	if (!sp)
 		return;
 
-	if (bus->p->drivers_autoprobe)
-		device_initial_probe(dev);
+	device_initial_probe(dev);
 
-	mutex_lock(&bus->p->mutex);
-	list_for_each_entry(sif, &bus->p->interfaces, node)
+	mutex_lock(&sp->mutex);
+	list_for_each_entry(sif, &sp->interfaces, node)
 		if (sif->add_dev)
 			sif->add_dev(dev, sif);
-	mutex_unlock(&bus->p->mutex);
+	mutex_unlock(&sp->mutex);
+	subsys_put(sp);
 }
 
 /**
@@ -505,21 +593,20 @@ void bus_probe_device(struct device *dev)
  */
 void bus_remove_device(struct device *dev)
 {
-	struct bus_type *bus = dev->bus;
+	struct subsys_private *sp = bus_to_subsys(dev->bus);
 	struct subsys_interface *sif;
 
-	if (!bus)
+	if (!sp)
 		return;
 
-	mutex_lock(&bus->p->mutex);
-	list_for_each_entry(sif, &bus->p->interfaces, node)
+	mutex_lock(&sp->mutex);
+	list_for_each_entry(sif, &sp->interfaces, node)
 		if (sif->remove_dev)
 			sif->remove_dev(dev, sif);
-	mutex_unlock(&bus->p->mutex);
+	mutex_unlock(&sp->mutex);
 
 	sysfs_remove_link(&dev->kobj, "subsystem");
-	sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
-			  dev_name(dev));
+	sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev));
 	device_remove_groups(dev, dev->bus->dev_groups);
 	if (klist_node_attached(&dev->p->knode_bus))
 		klist_del(&dev->p->knode_bus);
@@ -527,7 +614,14 @@ void bus_remove_device(struct device *dev)
 	pr_debug("bus: '%s': remove device %s\n",
 		 dev->bus->name, dev_name(dev));
 	device_release_driver(dev);
-	bus_put(dev->bus);
+
+	/*
+	 * Decrement the reference count twice, once for the bus_to_subsys()
+	 * call in the start of this function, and the second one from the
+	 * reference increment in bus_add_device()
+	 */
+	subsys_put(sp);
+	subsys_put(sp);
 }
 
 static int __must_check add_bind_files(struct device_driver *drv)
@@ -552,7 +646,7 @@ static void remove_bind_files(struct device_driver *drv)
 static BUS_ATTR_WO(drivers_probe);
 static BUS_ATTR_RW(drivers_autoprobe);
 
-static int add_probe_files(struct bus_type *bus)
+static int add_probe_files(const struct bus_type *bus)
 {
 	int retval;
 
@@ -567,7 +661,7 @@ out:
 	return retval;
 }
 
-static void remove_probe_files(struct bus_type *bus)
+static void remove_probe_files(const struct bus_type *bus)
 {
 	bus_remove_file(bus, &bus_attr_drivers_autoprobe);
 	bus_remove_file(bus, &bus_attr_drivers_probe);
@@ -589,15 +683,18 @@ static DRIVER_ATTR_WO(uevent);
  */
 int bus_add_driver(struct device_driver *drv)
 {
-	struct bus_type *bus;
+	struct subsys_private *sp = bus_to_subsys(drv->bus);
 	struct driver_private *priv;
 	int error = 0;
 
-	bus = bus_get(drv->bus);
-	if (!bus)
+	if (!sp)
 		return -EINVAL;
 
-	pr_debug("bus: '%s': add driver %s\n", bus->name, drv->name);
+	/*
+	 * Reference in sp is now incremented and will be dropped when
+	 * the driver is removed from the bus
+	 */
+	pr_debug("bus: '%s': add driver %s\n", sp->bus->name, drv->name);
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
@@ -607,26 +704,31 @@ int bus_add_driver(struct device_driver *drv)
 	klist_init(&priv->klist_devices, NULL, NULL);
 	priv->driver = drv;
 	drv->p = priv;
-	priv->kobj.kset = bus->p->drivers_kset;
+	priv->kobj.kset = sp->drivers_kset;
 	error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL,
 				     "%s", drv->name);
 	if (error)
 		goto out_unregister;
 
-	klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);
-	if (drv->bus->p->drivers_autoprobe) {
+	klist_add_tail(&priv->knode_bus, &sp->klist_drivers);
+	if (sp->drivers_autoprobe) {
 		error = driver_attach(drv);
 		if (error)
-			goto out_unregister;
+			goto out_del_list;
+	}
+	error = module_add_driver(drv->owner, drv);
+	if (error) {
+		printk(KERN_ERR "%s: failed to create module links for %s\n",
+			__func__, drv->name);
+		goto out_detach;
 	}
-	module_add_driver(drv->owner, drv);
 
 	error = driver_create_file(drv, &driver_attr_uevent);
 	if (error) {
 		printk(KERN_ERR "%s: uevent attr (%s) failed\n",
 			__func__, drv->name);
 	}
-	error = driver_add_groups(drv, bus->drv_groups);
+	error = driver_add_groups(drv, sp->bus->drv_groups);
 	if (error) {
 		/* How the hell do we get out of this pickle? Give up */
 		printk(KERN_ERR "%s: driver_add_groups(%s) failed\n",
@@ -644,12 +746,16 @@ int bus_add_driver(struct device_driver *drv)
 
 	return 0;
 
+out_detach:
+	driver_detach(drv);
+out_del_list:
+	klist_del(&priv->knode_bus);
 out_unregister:
 	kobject_put(&priv->kobj);
 	/* drv->p is freed in driver_release()  */
 	drv->p = NULL;
 out_put_bus:
-	bus_put(bus);
+	subsys_put(sp);
 	return error;
 }
 
@@ -663,19 +769,29 @@ out_put_bus:
  */
 void bus_remove_driver(struct device_driver *drv)
 {
-	if (!drv->bus)
+	struct subsys_private *sp = bus_to_subsys(drv->bus);
+
+	if (!sp)
 		return;
 
+	pr_debug("bus: '%s': remove driver %s\n", sp->bus->name, drv->name);
+
 	if (!drv->suppress_bind_attrs)
 		remove_bind_files(drv);
-	driver_remove_groups(drv, drv->bus->drv_groups);
+	driver_remove_groups(drv, sp->bus->drv_groups);
 	driver_remove_file(drv, &driver_attr_uevent);
 	klist_remove(&drv->p->knode_bus);
-	pr_debug("bus: '%s': remove driver %s\n", drv->bus->name, drv->name);
 	driver_detach(drv);
 	module_remove_driver(drv);
 	kobject_put(&drv->p->kobj);
-	bus_put(drv->bus);
+
+	/*
+	 * Decrement the reference count twice, once for the bus_to_subsys()
+	 * call in the start of this function, and the second one from the
+	 * reference increment in bus_add_driver()
+	 */
+	subsys_put(sp);
+	subsys_put(sp);
 }
 
 /* Helper for bus_rescan_devices's iter */
@@ -702,7 +818,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev,
  * attached and rescan it against existing drivers to see if it matches
  * any by calling device_attach() for the unbound devices.
  */
-int bus_rescan_devices(struct bus_type *bus)
+int bus_rescan_devices(const struct bus_type *bus)
 {
 	return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper);
 }
@@ -725,18 +841,6 @@ int device_reprobe(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_reprobe);
 
-static int bus_add_groups(struct bus_type *bus,
-			  const struct attribute_group **groups)
-{
-	return sysfs_create_groups(&bus->p->subsys.kobj, groups);
-}
-
-static void bus_remove_groups(struct bus_type *bus,
-			      const struct attribute_group **groups)
-{
-	sysfs_remove_groups(&bus->p->subsys.kobj, groups);
-}
-
 static void klist_devices_get(struct klist_node *n)
 {
 	struct device_private *dev_prv = to_device_private_bus(n);
@@ -753,13 +857,21 @@ static void klist_devices_put(struct klist_node *n)
 	put_device(dev);
 }
 
-static ssize_t bus_uevent_store(struct bus_type *bus,
+static ssize_t bus_uevent_store(const struct bus_type *bus,
 				const char *buf, size_t count)
 {
-	int rc;
+	struct subsys_private *sp = bus_to_subsys(bus);
+	int ret;
 
-	rc = kobject_synth_uevent(&bus->p->subsys.kobj, buf, count);
-	return rc ? rc : count;
+	if (!sp)
+		return -EINVAL;
+
+	ret = kobject_synth_uevent(&sp->subsys.kobj, buf, count);
+	subsys_put(sp);
+
+	if (ret)
+		return ret;
+	return count;
 }
 /*
  * "open code" the old BUS_ATTR() macro here.  We want to use BUS_ATTR_WO()
@@ -767,7 +879,7 @@ static ssize_t bus_uevent_store(struct bus_type *bus,
  * DEVICE_ATTR_WO(uevent), which would cause a clash with the with the store
  * function name.
  */
-static struct bus_attribute bus_attr_uevent = __ATTR(uevent, S_IWUSR, NULL,
+static struct bus_attribute bus_attr_uevent = __ATTR(uevent, 0200, NULL,
 						     bus_uevent_store);
 
 /**
@@ -778,27 +890,28 @@ static struct bus_attribute bus_attr_uevent = __ATTR(uevent, S_IWUSR, NULL,
  * infrastructure, then register the children subsystems it has:
  * the devices and drivers that belong to the subsystem.
  */
-int bus_register(struct bus_type *bus)
+int bus_register(const struct bus_type *bus)
 {
 	int retval;
 	struct subsys_private *priv;
-	struct lock_class_key *key = &bus->lock_key;
+	struct kobject *bus_kobj;
+	struct lock_class_key *key;
 
 	priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
 	priv->bus = bus;
-	bus->p = priv;
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier);
 
-	retval = kobject_set_name(&priv->subsys.kobj, "%s", bus->name);
+	bus_kobj = &priv->subsys.kobj;
+	retval = kobject_set_name(bus_kobj, "%s", bus->name);
 	if (retval)
 		goto out;
 
-	priv->subsys.kobj.kset = bus_kset;
-	priv->subsys.kobj.ktype = &bus_ktype;
+	bus_kobj->kset = bus_kset;
+	bus_kobj->ktype = &bus_ktype;
 	priv->drivers_autoprobe = 1;
 
 	retval = kset_register(&priv->subsys);
@@ -809,21 +922,21 @@ int bus_register(struct bus_type *bus)
 	if (retval)
 		goto bus_uevent_fail;
 
-	priv->devices_kset = kset_create_and_add("devices", NULL,
-						 &priv->subsys.kobj);
+	priv->devices_kset = kset_create_and_add("devices", NULL, bus_kobj);
 	if (!priv->devices_kset) {
 		retval = -ENOMEM;
 		goto bus_devices_fail;
 	}
 
-	priv->drivers_kset = kset_create_and_add("drivers", NULL,
-						 &priv->subsys.kobj);
+	priv->drivers_kset = kset_create_and_add("drivers", NULL, bus_kobj);
 	if (!priv->drivers_kset) {
 		retval = -ENOMEM;
 		goto bus_drivers_fail;
 	}
 
 	INIT_LIST_HEAD(&priv->interfaces);
+	key = &priv->lock_key;
+	lockdep_register_key(key);
 	__mutex_init(&priv->mutex, "subsys mutex", key);
 	klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
 	klist_init(&priv->klist_drivers, NULL, NULL);
@@ -832,7 +945,7 @@ int bus_register(struct bus_type *bus)
 	if (retval)
 		goto bus_probe_files_fail;
 
-	retval = bus_add_groups(bus, bus->bus_groups);
+	retval = sysfs_create_groups(bus_kobj, bus->bus_groups);
 	if (retval)
 		goto bus_groups_fail;
 
@@ -842,16 +955,17 @@ int bus_register(struct bus_type *bus)
 bus_groups_fail:
 	remove_probe_files(bus);
 bus_probe_files_fail:
-	kset_unregister(bus->p->drivers_kset);
+	kset_unregister(priv->drivers_kset);
 bus_drivers_fail:
-	kset_unregister(bus->p->devices_kset);
+	kset_unregister(priv->devices_kset);
 bus_devices_fail:
 	bus_remove_file(bus, &bus_attr_uevent);
 bus_uevent_fail:
-	kset_unregister(&bus->p->subsys);
+	kset_unregister(&priv->subsys);
+	/* Above kset_unregister() will kfree @priv */
+	priv = NULL;
 out:
-	kfree(bus->p);
-	bus->p = NULL;
+	kfree(priv);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(bus_register);
@@ -863,43 +977,82 @@ EXPORT_SYMBOL_GPL(bus_register);
  * Unregister the child subsystems and the bus itself.
  * Finally, we call bus_put() to release the refcount
  */
-void bus_unregister(struct bus_type *bus)
+void bus_unregister(const struct bus_type *bus)
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
+	struct kobject *bus_kobj;
+
+	if (!sp)
+		return;
+
 	pr_debug("bus: '%s': unregistering\n", bus->name);
-	if (bus->dev_root)
-		device_unregister(bus->dev_root);
-	bus_remove_groups(bus, bus->bus_groups);
+	if (sp->dev_root)
+		device_unregister(sp->dev_root);
+
+	bus_kobj = &sp->subsys.kobj;
+	sysfs_remove_groups(bus_kobj, bus->bus_groups);
 	remove_probe_files(bus);
-	kset_unregister(bus->p->drivers_kset);
-	kset_unregister(bus->p->devices_kset);
 	bus_remove_file(bus, &bus_attr_uevent);
-	kset_unregister(&bus->p->subsys);
+
+	kset_unregister(sp->drivers_kset);
+	kset_unregister(sp->devices_kset);
+	kset_unregister(&sp->subsys);
+	subsys_put(sp);
 }
 EXPORT_SYMBOL_GPL(bus_unregister);
 
-int bus_register_notifier(struct bus_type *bus, struct notifier_block *nb)
+int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb)
 {
-	return blocking_notifier_chain_register(&bus->p->bus_notifier, nb);
+	struct subsys_private *sp = bus_to_subsys(bus);
+	int retval;
+
+	if (!sp)
+		return -EINVAL;
+
+	retval = blocking_notifier_chain_register(&sp->bus_notifier, nb);
+	subsys_put(sp);
+	return retval;
 }
 EXPORT_SYMBOL_GPL(bus_register_notifier);
 
-int bus_unregister_notifier(struct bus_type *bus, struct notifier_block *nb)
+int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb)
 {
-	return blocking_notifier_chain_unregister(&bus->p->bus_notifier, nb);
+	struct subsys_private *sp = bus_to_subsys(bus);
+	int retval;
+
+	if (!sp)
+		return -EINVAL;
+	retval = blocking_notifier_chain_unregister(&sp->bus_notifier, nb);
+	subsys_put(sp);
+	return retval;
 }
 EXPORT_SYMBOL_GPL(bus_unregister_notifier);
 
-struct kset *bus_get_kset(struct bus_type *bus)
+void bus_notify(struct device *dev, enum bus_notifier_event value)
 {
-	return &bus->p->subsys;
+	struct subsys_private *sp = bus_to_subsys(dev->bus);
+
+	if (!sp)
+		return;
+
+	blocking_notifier_call_chain(&sp->bus_notifier, value, dev);
+	subsys_put(sp);
 }
-EXPORT_SYMBOL_GPL(bus_get_kset);
 
-struct klist *bus_get_device_klist(struct bus_type *bus)
+struct kset *bus_get_kset(const struct bus_type *bus)
 {
-	return &bus->p->klist_devices;
+	struct subsys_private *sp = bus_to_subsys(bus);
+	struct kset *kset;
+
+	if (!sp)
+		return NULL;
+
+	kset = &sp->subsys;
+	subsys_put(sp);
+
+	return kset;
 }
-EXPORT_SYMBOL_GPL(bus_get_device_klist);
+EXPORT_SYMBOL_GPL(bus_get_kset);
 
 /*
  * Yes, this forcibly breaks the klist abstraction temporarily.  It
@@ -928,17 +1081,20 @@ static void device_insertion_sort_klist(struct device *a, struct list_head *list
 	list_move_tail(&a->p->knode_bus.n_node, list);
 }
 
-void bus_sort_breadthfirst(struct bus_type *bus,
+void bus_sort_breadthfirst(const struct bus_type *bus,
 			   int (*compare)(const struct device *a,
 					  const struct device *b))
 {
+	struct subsys_private *sp = bus_to_subsys(bus);
 	LIST_HEAD(sorted_devices);
 	struct klist_node *n, *tmp;
 	struct device_private *dev_prv;
 	struct device *dev;
 	struct klist *device_klist;
 
-	device_klist = bus_get_device_klist(bus);
+	if (!sp)
+		return;
+	device_klist = &sp->klist_devices;
 
 	spin_lock(&device_klist->k_lock);
 	list_for_each_entry_safe(n, tmp, &device_klist->k_list, n_node) {
@@ -948,13 +1104,19 @@ void bus_sort_breadthfirst(struct bus_type *bus,
 	}
 	list_splice(&sorted_devices, &device_klist->k_list);
 	spin_unlock(&device_klist->k_lock);
+	subsys_put(sp);
 }
 EXPORT_SYMBOL_GPL(bus_sort_breadthfirst);
 
+struct subsys_dev_iter {
+	struct klist_iter		ki;
+	const struct device_type	*type;
+};
+
 /**
  * subsys_dev_iter_init - initialize subsys device iterator
  * @iter: subsys iterator to initialize
- * @subsys: the subsys we wanna iterate over
+ * @sp: the subsys private (i.e. bus) we wanna iterate over
  * @start: the device to start iterating from, if any
  * @type: device_type of the devices to iterate over, NULL for all
  *
@@ -963,17 +1125,16 @@ EXPORT_SYMBOL_GPL(bus_sort_breadthfirst);
  * otherwise if it is NULL, the iteration starts at the beginning of
  * the list.
  */
-void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys,
-			  struct device *start, const struct device_type *type)
+static void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct subsys_private *sp,
+				 struct device *start, const struct device_type *type)
 {
 	struct klist_node *start_knode = NULL;
 
 	if (start)
 		start_knode = &start->p->knode_bus;
-	klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, start_knode);
+	klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode);
 	iter->type = type;
 }
-EXPORT_SYMBOL_GPL(subsys_dev_iter_init);
 
 /**
  * subsys_dev_iter_next - iterate to the next device
@@ -987,7 +1148,7 @@ EXPORT_SYMBOL_GPL(subsys_dev_iter_init);
  * free to do whatever it wants to do with the device including
  * calling back into subsys code.
  */
-struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter)
+static struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter)
 {
 	struct klist_node *knode;
 	struct device *dev;
@@ -1001,7 +1162,6 @@ struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter)
 			return dev;
 	}
 }
-EXPORT_SYMBOL_GPL(subsys_dev_iter_next);
 
 /**
  * subsys_dev_iter_exit - finish iteration
@@ -1010,34 +1170,38 @@ EXPORT_SYMBOL_GPL(subsys_dev_iter_next);
  * Finish an iteration.  Always call this function after iteration is
  * complete whether the iteration ran till the end or not.
  */
-void subsys_dev_iter_exit(struct subsys_dev_iter *iter)
+static void subsys_dev_iter_exit(struct subsys_dev_iter *iter)
 {
 	klist_iter_exit(&iter->ki);
 }
-EXPORT_SYMBOL_GPL(subsys_dev_iter_exit);
 
 int subsys_interface_register(struct subsys_interface *sif)
 {
-	struct bus_type *subsys;
+	struct subsys_private *sp;
 	struct subsys_dev_iter iter;
 	struct device *dev;
 
 	if (!sif || !sif->subsys)
 		return -ENODEV;
 
-	subsys = bus_get(sif->subsys);
-	if (!subsys)
+	sp = bus_to_subsys(sif->subsys);
+	if (!sp)
 		return -EINVAL;
 
-	mutex_lock(&subsys->p->mutex);
-	list_add_tail(&sif->node, &subsys->p->interfaces);
+	/*
+	 * Reference in sp is now incremented and will be dropped when
+	 * the interface is removed from the bus
+	 */
+
+	mutex_lock(&sp->mutex);
+	list_add_tail(&sif->node, &sp->interfaces);
 	if (sif->add_dev) {
-		subsys_dev_iter_init(&iter, subsys, NULL, NULL);
+		subsys_dev_iter_init(&iter, sp, NULL, NULL);
 		while ((dev = subsys_dev_iter_next(&iter)))
 			sif->add_dev(dev, sif);
 		subsys_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&subsys->p->mutex);
+	mutex_unlock(&sp->mutex);
 
 	return 0;
 }
@@ -1045,26 +1209,34 @@ EXPORT_SYMBOL_GPL(subsys_interface_register);
 
 void subsys_interface_unregister(struct subsys_interface *sif)
 {
-	struct bus_type *subsys;
+	struct subsys_private *sp;
 	struct subsys_dev_iter iter;
 	struct device *dev;
 
 	if (!sif || !sif->subsys)
 		return;
 
-	subsys = sif->subsys;
+	sp = bus_to_subsys(sif->subsys);
+	if (!sp)
+		return;
 
-	mutex_lock(&subsys->p->mutex);
+	mutex_lock(&sp->mutex);
 	list_del_init(&sif->node);
 	if (sif->remove_dev) {
-		subsys_dev_iter_init(&iter, subsys, NULL, NULL);
+		subsys_dev_iter_init(&iter, sp, NULL, NULL);
 		while ((dev = subsys_dev_iter_next(&iter)))
 			sif->remove_dev(dev, sif);
 		subsys_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&subsys->p->mutex);
-
-	bus_put(subsys);
+	mutex_unlock(&sp->mutex);
+
+	/*
+	 * Decrement the reference count twice, once for the bus_to_subsys()
+	 * call in the start of this function, and the second one from the
+	 * reference increment in subsys_interface_register()
+	 */
+	subsys_put(sp);
+	subsys_put(sp);
 }
 EXPORT_SYMBOL_GPL(subsys_interface_unregister);
 
@@ -1073,10 +1245,11 @@ static void system_root_device_release(struct device *dev)
 	kfree(dev);
 }
 
-static int subsys_register(struct bus_type *subsys,
+static int subsys_register(const struct bus_type *subsys,
 			   const struct attribute_group **groups,
 			   struct kobject *parent_of_root)
 {
+	struct subsys_private *sp;
 	struct device *dev;
 	int err;
 
@@ -1084,6 +1257,12 @@ static int subsys_register(struct bus_type *subsys,
 	if (err < 0)
 		return err;
 
+	sp = bus_to_subsys(subsys);
+	if (!sp) {
+		err = -EINVAL;
+		goto err_sp;
+	}
+
 	dev = kzalloc(sizeof(struct device), GFP_KERNEL);
 	if (!dev) {
 		err = -ENOMEM;
@@ -1102,7 +1281,8 @@ static int subsys_register(struct bus_type *subsys,
 	if (err < 0)
 		goto err_dev_reg;
 
-	subsys->dev_root = dev;
+	sp->dev_root = dev;
+	subsys_put(sp);
 	return 0;
 
 err_dev_reg:
@@ -1111,6 +1291,8 @@ err_dev_reg:
 err_name:
 	kfree(dev);
 err_dev:
+	subsys_put(sp);
+err_sp:
 	bus_unregister(subsys);
 	return err;
 }
@@ -1133,7 +1315,7 @@ err_dev:
  * directory itself and not some create fake root-device placed in
  * /sys/devices/system/<name>.
  */
-int subsys_system_register(struct bus_type *subsys,
+int subsys_system_register(const struct bus_type *subsys,
 			   const struct attribute_group **groups)
 {
 	return subsys_register(subsys, groups, &system_kset->kobj);
@@ -1146,17 +1328,17 @@ EXPORT_SYMBOL_GPL(subsys_system_register);
  * @groups: default attributes for the root device
  *
  * All 'virtual' subsystems have a /sys/devices/system/<name> root device
- * with the name of the subystem.  The root device can carry subsystem-wide
+ * with the name of the subsystem.  The root device can carry subsystem-wide
  * attributes.  All registered devices are below this single root device.
  * There's no restriction on device naming.  This is for kernel software
  * constructs which need sysfs interface.
  */
-int subsys_virtual_register(struct bus_type *subsys,
+int subsys_virtual_register(const struct bus_type *subsys,
 			    const struct attribute_group **groups)
 {
 	struct kobject *virtual_dir;
 
-	virtual_dir = virtual_device_parent(NULL);
+	virtual_dir = virtual_device_parent();
 	if (!virtual_dir)
 		return -ENOMEM;
 
@@ -1164,6 +1346,82 @@ int subsys_virtual_register(struct bus_type *subsys,
 }
 EXPORT_SYMBOL_GPL(subsys_virtual_register);
 
+/**
+ * driver_find - locate driver on a bus by its name.
+ * @name: name of the driver.
+ * @bus: bus to scan for the driver.
+ *
+ * Call kset_find_obj() to iterate over list of drivers on
+ * a bus to find driver by name. Return driver if found.
+ *
+ * This routine provides no locking to prevent the driver it returns
+ * from being unregistered or unloaded while the caller is using it.
+ * The caller is responsible for preventing this.
+ */
+struct device_driver *driver_find(const char *name, const struct bus_type *bus)
+{
+	struct subsys_private *sp = bus_to_subsys(bus);
+	struct kobject *k;
+	struct driver_private *priv;
+
+	if (!sp)
+		return NULL;
+
+	k = kset_find_obj(sp->drivers_kset, name);
+	subsys_put(sp);
+	if (!k)
+		return NULL;
+
+	priv = to_driver(k);
+
+	/* Drop reference added by kset_find_obj() */
+	kobject_put(k);
+	return priv->driver;
+}
+EXPORT_SYMBOL_GPL(driver_find);
+
+/*
+ * Warning, the value could go to "removed" instantly after calling this function, so be very
+ * careful when calling it...
+ */
+bool bus_is_registered(const struct bus_type *bus)
+{
+	struct subsys_private *sp = bus_to_subsys(bus);
+	bool is_initialized = false;
+
+	if (sp) {
+		is_initialized = true;
+		subsys_put(sp);
+	}
+	return is_initialized;
+}
+
+/**
+ * bus_get_dev_root - return a pointer to the "device root" of a bus
+ * @bus: bus to return the device root of.
+ *
+ * If a bus has a "device root" structure, return it, WITH THE REFERENCE
+ * COUNT INCREMENTED.
+ *
+ * Note, when finished with the device, a call to put_device() is required.
+ *
+ * If the device root is not present (or bus is not a valid pointer), NULL
+ * will be returned.
+ */
+struct device *bus_get_dev_root(const struct bus_type *bus)
+{
+	struct subsys_private *sp = bus_to_subsys(bus);
+	struct device *dev_root;
+
+	if (!sp)
+		return NULL;
+
+	dev_root = get_device(sp->dev_root);
+	subsys_put(sp);
+	return dev_root;
+}
+EXPORT_SYMBOL_GPL(bus_get_dev_root);
+
 int __init buses_init(void)
 {
 	bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
@@ -1171,8 +1429,13 @@ int __init buses_init(void)
 		return -ENOMEM;
 
 	system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
-	if (!system_kset)
+	if (!system_kset) {
+		/* Do error handling here as devices_init() do */
+		kset_unregister(bus_kset);
+		bus_kset = NULL;
+		pr_err("%s: failed to create and add kset 'bus'\n", __func__);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index bfc095956dd1..613410705a47 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -8,6 +8,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/acpi.h>
+#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/cacheinfo.h>
 #include <linux/compiler.h>
@@ -25,19 +26,66 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
 #define ci_cacheinfo(cpu)	(&per_cpu(ci_cpu_cacheinfo, cpu))
 #define cache_leaves(cpu)	(ci_cacheinfo(cpu)->num_leaves)
 #define per_cpu_cacheinfo(cpu)	(ci_cacheinfo(cpu)->info_list)
+#define per_cpu_cacheinfo_idx(cpu, idx)		\
+				(per_cpu_cacheinfo(cpu) + (idx))
+
+/* Set if no cache information is found in DT/ACPI. */
+static bool use_arch_info;
 
 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 {
 	return ci_cacheinfo(cpu);
 }
 
-#ifdef CONFIG_OF
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
 {
+	/*
+	 * For non DT/ACPI systems, assume unique level 1 caches,
+	 * system-wide shared caches for all other levels.
+	 */
+	if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI)) ||
+	    use_arch_info)
+		return (this_leaf->level != 1) && (sib_leaf->level != 1);
+
+	if ((sib_leaf->attributes & CACHE_ID) &&
+	    (this_leaf->attributes & CACHE_ID))
+		return sib_leaf->id == this_leaf->id;
+
 	return sib_leaf->fw_token == this_leaf->fw_token;
 }
 
+bool last_level_cache_is_valid(unsigned int cpu)
+{
+	struct cacheinfo *llc;
+
+	if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
+		return false;
+
+	llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+	return (llc->attributes & CACHE_ID) || !!llc->fw_token;
+
+}
+
+bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y)
+{
+	struct cacheinfo *llc_x, *llc_y;
+
+	if (!last_level_cache_is_valid(cpu_x) ||
+	    !last_level_cache_is_valid(cpu_y))
+		return false;
+
+	llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1);
+	llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1);
+
+	return cache_leaves_are_shared(llc_x, llc_y);
+}
+
+#ifdef CONFIG_OF
+
+static bool of_check_cache_nodes(struct device_node *np);
+
 /* OF properties to query for a given cache type */
 struct cache_type_info {
 	const char *size_prop;
@@ -136,6 +184,54 @@ static bool cache_node_is_unified(struct cacheinfo *this_leaf,
 	return of_property_read_bool(np, "cache-unified");
 }
 
+static bool match_cache_node(struct device_node *cpu,
+			     const struct device_node *cache_node)
+{
+	struct device_node *prev, *cache = of_find_next_cache_node(cpu);
+
+	while (cache) {
+		if (cache == cache_node) {
+			of_node_put(cache);
+			return true;
+		}
+
+		prev = cache;
+		cache = of_find_next_cache_node(cache);
+		of_node_put(prev);
+	}
+
+	return false;
+}
+
+#ifndef arch_compact_of_hwid
+#define arch_compact_of_hwid(_x)	(_x)
+#endif
+
+static void cache_of_set_id(struct cacheinfo *this_leaf,
+			    struct device_node *cache_node)
+{
+	struct device_node *cpu;
+	u32 min_id = ~0;
+
+	for_each_of_cpu_node(cpu) {
+		u64 id = of_get_cpu_hwid(cpu, 0);
+
+		id = arch_compact_of_hwid(id);
+		if (FIELD_GET(GENMASK_ULL(63, 32), id)) {
+			of_node_put(cpu);
+			return;
+		}
+
+		if (match_cache_node(cpu, cache_node))
+			min_id = min(min_id, id);
+	}
+
+	if (min_id != ~0) {
+		this_leaf->id = min_id;
+		this_leaf->attributes |= CACHE_ID;
+	}
+}
+
 static void cache_of_set_props(struct cacheinfo *this_leaf,
 			       struct device_node *np)
 {
@@ -151,39 +247,32 @@ static void cache_of_set_props(struct cacheinfo *this_leaf,
 	cache_get_line_size(this_leaf, np);
 	cache_nr_sets(this_leaf, np);
 	cache_associativity(this_leaf);
+	cache_of_set_id(this_leaf, np);
 }
 
 static int cache_setup_of_node(unsigned int cpu)
 {
-	struct device_node *np;
 	struct cacheinfo *this_leaf;
-	struct device *cpu_dev = get_cpu_device(cpu);
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	unsigned int index = 0;
 
-	/* skip if fw_token is already populated */
-	if (this_cpu_ci->info_list->fw_token) {
-		return 0;
-	}
-
-	if (!cpu_dev) {
-		pr_err("No cpu device for CPU %d\n", cpu);
-		return -ENODEV;
-	}
-	np = cpu_dev->of_node;
+	struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
 	if (!np) {
 		pr_err("Failed to find cpu%d device node\n", cpu);
 		return -ENOENT;
 	}
 
+	if (!of_check_cache_nodes(np)) {
+		return -ENOENT;
+	}
+
 	while (index < cache_leaves(cpu)) {
-		this_leaf = this_cpu_ci->info_list + index;
-		if (this_leaf->level != 1)
+		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
+		if (this_leaf->level != 1) {
+			struct device_node *prev __free(device_node) = np;
 			np = of_find_next_cache_node(np);
-		else
-			np = of_node_get(np);/* cpu node itself */
-		if (!np)
-			break;
+			if (!np)
+				break;
+		}
 		cache_of_set_props(this_leaf, np);
 		this_leaf->fw_token = np;
 		index++;
@@ -194,18 +283,87 @@ static int cache_setup_of_node(unsigned int cpu)
 
 	return 0;
 }
-#else
-static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
-static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
-					   struct cacheinfo *sib_leaf)
+
+static bool of_check_cache_nodes(struct device_node *np)
 {
-	/*
-	 * For non-DT/ACPI systems, assume unique level 1 caches, system-wide
-	 * shared caches for all other levels. This will be used only if
-	 * arch specific code has not populated shared_cpu_map
-	 */
-	return !(this_leaf->level == 1);
+	if (of_property_present(np, "cache-size")   ||
+	    of_property_present(np, "i-cache-size") ||
+	    of_property_present(np, "d-cache-size") ||
+	    of_property_present(np, "cache-unified"))
+		return true;
+
+	struct device_node *next __free(device_node) = of_find_next_cache_node(np);
+	if (next) {
+		return true;
+	}
+
+	return false;
+}
+
+static int of_count_cache_leaves(struct device_node *np)
+{
+	unsigned int leaves = 0;
+
+	if (of_property_present(np, "cache-size"))
+		++leaves;
+	if (of_property_present(np, "i-cache-size"))
+		++leaves;
+	if (of_property_present(np, "d-cache-size"))
+		++leaves;
+
+	if (!leaves) {
+		/* The '[i-|d-|]cache-size' property is required, but
+		 * if absent, fallback on the 'cache-unified' property.
+		 */
+		if (of_property_read_bool(np, "cache-unified"))
+			return 1;
+		else
+			return 2;
+	}
+
+	return leaves;
 }
+
+int init_of_cache_level(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
+	unsigned int levels = 0, leaves, level;
+
+	if (!of_check_cache_nodes(np)) {
+		return -ENOENT;
+	}
+
+	leaves = of_count_cache_leaves(np);
+	if (leaves > 0)
+		levels = 1;
+
+	while (1) {
+		struct device_node *prev __free(device_node) = np;
+		np = of_find_next_cache_node(np);
+		if (!np)
+			break;
+
+		if (!of_device_is_compatible(np, "cache"))
+			return -EINVAL;
+		if (of_property_read_u32(np, "cache-level", &level))
+			return -EINVAL;
+		if (level <= levels)
+			return -EINVAL;
+
+		leaves += of_count_cache_leaves(np);
+		levels = level;
+	}
+
+	this_cpu_ci->num_levels = levels;
+	this_cpu_ci->num_leaves = leaves;
+
+	return 0;
+}
+
+#else
+static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
+int init_of_cache_level(unsigned int cpu) { return 0; }
 #endif
 
 int __weak cache_setup_acpi(unsigned int cpu)
@@ -215,42 +373,69 @@ int __weak cache_setup_acpi(unsigned int cpu)
 
 unsigned int coherency_max_size;
 
+static int cache_setup_properties(unsigned int cpu)
+{
+	int ret = 0;
+
+	if (of_have_populated_dt())
+		ret = cache_setup_of_node(cpu);
+	else if (!acpi_disabled)
+		ret = cache_setup_acpi(cpu);
+
+	// Assume there is no cache information available in DT/ACPI from now.
+	if (ret && use_arch_cache_info())
+		use_arch_info = true;
+
+	return ret;
+}
+
 static int cache_shared_cpu_map_setup(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf, *sib_leaf;
-	unsigned int index;
+	unsigned int index, sib_index;
 	int ret = 0;
 
 	if (this_cpu_ci->cpu_map_populated)
 		return 0;
 
-	if (of_have_populated_dt())
-		ret = cache_setup_of_node(cpu);
-	else if (!acpi_disabled)
-		ret = cache_setup_acpi(cpu);
-
-	if (ret)
-		return ret;
+	/*
+	 * skip setting up cache properties if LLC is valid, just need
+	 * to update the shared cpu_map if the cache attributes were
+	 * populated early before all the cpus are brought online
+	 */
+	if (!last_level_cache_is_valid(cpu) && !use_arch_info) {
+		ret = cache_setup_properties(cpu);
+		if (ret)
+			return ret;
+	}
 
 	for (index = 0; index < cache_leaves(cpu); index++) {
 		unsigned int i;
 
-		this_leaf = this_cpu_ci->info_list + index;
-		/* skip if shared_cpu_map is already populated */
-		if (!cpumask_empty(&this_leaf->shared_cpu_map))
-			continue;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
 
 		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 		for_each_online_cpu(i) {
-			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
-
-			if (i == cpu || !sib_cpu_ci->info_list)
+			if (i == cpu || !per_cpu_cacheinfo(i))
 				continue;/* skip if itself or no cacheinfo */
-			sib_leaf = sib_cpu_ci->info_list + index;
-			if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
-				cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
-				cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+			for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) {
+				sib_leaf = per_cpu_cacheinfo_idx(i, sib_index);
+
+				/*
+				 * Comparing cache IDs only makes sense if the leaves
+				 * belong to the same cache level of same type. Skip
+				 * the check if level and type do not match.
+				 */
+				if (sib_leaf->level != this_leaf->level ||
+				    sib_leaf->type != this_leaf->type)
+					continue;
+
+				if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
+					cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
+					cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+					break;
+				}
 			}
 		}
 		/* record the maximum cache line size */
@@ -258,6 +443,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
 			coherency_max_size = this_leaf->coherency_line_size;
 	}
 
+	/* shared_cpu_map is now populated for the cpu */
+	this_cpu_ci->cpu_map_populated = true;
 	return 0;
 }
 
@@ -265,27 +452,37 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf, *sib_leaf;
-	unsigned int sibling, index;
+	unsigned int sibling, index, sib_index;
 
 	for (index = 0; index < cache_leaves(cpu); index++) {
-		this_leaf = this_cpu_ci->info_list + index;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
 		for_each_cpu(sibling, &this_leaf->shared_cpu_map) {
-			struct cpu_cacheinfo *sib_cpu_ci;
-
-			if (sibling == cpu) /* skip itself */
-				continue;
-
-			sib_cpu_ci = get_cpu_cacheinfo(sibling);
-			if (!sib_cpu_ci->info_list)
-				continue;
+			if (sibling == cpu || !per_cpu_cacheinfo(sibling))
+				continue;/* skip if itself or no cacheinfo */
 
-			sib_leaf = sib_cpu_ci->info_list + index;
-			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
-			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
+			for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) {
+				sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index);
+
+				/*
+				 * Comparing cache IDs only makes sense if the leaves
+				 * belong to the same cache level of same type. Skip
+				 * the check if level and type do not match.
+				 */
+				if (sib_leaf->level != this_leaf->level ||
+				    sib_leaf->type != this_leaf->type)
+					continue;
+
+				if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
+					cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
+					cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
+					break;
+				}
+			}
 		}
-		if (of_have_populated_dt())
-			of_node_put(this_leaf->fw_token);
 	}
+
+	/* cpu is no longer populated in the shared map */
+	this_cpu_ci->cpu_map_populated = false;
 }
 
 static void free_cache_attributes(unsigned int cpu)
@@ -294,9 +491,11 @@ static void free_cache_attributes(unsigned int cpu)
 		return;
 
 	cache_shared_cpu_map_remove(cpu);
+}
 
-	kfree(per_cpu_cacheinfo(cpu));
-	per_cpu_cacheinfo(cpu) = NULL;
+int __weak early_cache_level(unsigned int cpu)
+{
+	return -ENOENT;
 }
 
 int __weak init_cache_level(unsigned int cpu)
@@ -309,25 +508,113 @@ int __weak populate_cache_leaves(unsigned int cpu)
 	return -ENOENT;
 }
 
-static int detect_cache_attributes(unsigned int cpu)
+static inline int allocate_cache_info(int cpu)
 {
+	per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
+	if (!per_cpu_cacheinfo(cpu)) {
+		cache_leaves(cpu) = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int fetch_cache_info(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	unsigned int levels = 0, split_levels = 0;
 	int ret;
 
+	if (acpi_disabled) {
+		ret = init_of_cache_level(cpu);
+	} else {
+		ret = acpi_get_cache_info(cpu, &levels, &split_levels);
+		if (!ret) {
+			this_cpu_ci->num_levels = levels;
+			/*
+			 * This assumes that:
+			 * - there cannot be any split caches (data/instruction)
+			 *   above a unified cache
+			 * - data/instruction caches come by pair
+			 */
+			this_cpu_ci->num_leaves = levels + split_levels;
+		}
+	}
+
+	if (ret || !cache_leaves(cpu)) {
+		ret = early_cache_level(cpu);
+		if (ret)
+			return ret;
+
+		if (!cache_leaves(cpu))
+			return -ENOENT;
+
+		this_cpu_ci->early_ci_levels = true;
+	}
+
+	return allocate_cache_info(cpu);
+}
+
+static inline int init_level_allocate_ci(unsigned int cpu)
+{
+	unsigned int early_leaves = cache_leaves(cpu);
+
+	/* Since early initialization/allocation of the cacheinfo is allowed
+	 * via fetch_cache_info() and this also gets called as CPU hotplug
+	 * callbacks via cacheinfo_cpu_online, the init/alloc can be skipped
+	 * as it will happen only once (the cacheinfo memory is never freed).
+	 * Just populate the cacheinfo. However, if the cacheinfo has been
+	 * allocated early through the arch-specific early_cache_level() call,
+	 * there is a chance the info is wrong (this can happen on arm64). In
+	 * that case, call init_cache_level() anyway to give the arch-specific
+	 * code a chance to make things right.
+	 */
+	if (per_cpu_cacheinfo(cpu) && !ci_cacheinfo(cpu)->early_ci_levels)
+		return 0;
+
 	if (init_cache_level(cpu) || !cache_leaves(cpu))
 		return -ENOENT;
 
-	per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
-					 sizeof(struct cacheinfo), GFP_KERNEL);
-	if (per_cpu_cacheinfo(cpu) == NULL)
-		return -ENOMEM;
+	/*
+	 * Now that we have properly initialized the cache level info, make
+	 * sure we don't try to do that again the next time we are called
+	 * (e.g. as CPU hotplug callbacks).
+	 */
+	ci_cacheinfo(cpu)->early_ci_levels = false;
 
 	/*
-	 * populate_cache_leaves() may completely setup the cache leaves and
-	 * shared_cpu_map or it may leave it partially setup.
+	 * Some architectures (e.g., x86) do not use early initialization.
+	 * Allocate memory now in such case.
 	 */
-	ret = populate_cache_leaves(cpu);
+	if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
+		return 0;
+
+	kfree(per_cpu_cacheinfo(cpu));
+	return allocate_cache_info(cpu);
+}
+
+int detect_cache_attributes(unsigned int cpu)
+{
+	int ret;
+
+	ret = init_level_allocate_ci(cpu);
 	if (ret)
-		goto free_ci;
+		return ret;
+
+	/*
+	 * If LLC is valid the cache leaves were already populated so just go to
+	 * update the cpu map.
+	 */
+	if (!last_level_cache_is_valid(cpu)) {
+		/*
+		 * populate_cache_leaves() may completely setup the cache leaves and
+		 * shared_cpu_map or it may leave it partially setup.
+		 */
+		ret = populate_cache_leaves(cpu);
+		if (ret)
+			goto free_ci;
+	}
+
 	/*
 	 * For systems using DT for cache hierarchy, fw_token
 	 * and shared_cpu_map will be set up here only if they are
@@ -613,7 +900,6 @@ static int cache_add_dev(unsigned int cpu)
 	int rc;
 	struct device *ci_dev, *parent;
 	struct cacheinfo *this_leaf;
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	const struct attribute_group **cache_groups;
 
 	rc = cpu_cache_sysfs_init(cpu);
@@ -622,7 +908,7 @@ static int cache_add_dev(unsigned int cpu)
 
 	parent = per_cpu_cache_dev(cpu);
 	for (i = 0; i < cache_leaves(cpu); i++) {
-		this_leaf = this_cpu_ci->info_list + i;
+		this_leaf = per_cpu_cacheinfo_idx(cpu, i);
 		if (this_leaf->disable_sysfs)
 			continue;
 		if (this_leaf->type == CACHE_TYPE_NOCACHE)
@@ -644,24 +930,111 @@ err:
 	return rc;
 }
 
+static unsigned int cpu_map_shared_cache(bool online, unsigned int cpu,
+					 cpumask_t **map)
+{
+	struct cacheinfo *llc, *sib_llc;
+	unsigned int sibling;
+
+	if (!last_level_cache_is_valid(cpu))
+		return 0;
+
+	llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+	if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
+		return 0;
+
+	if (online) {
+		*map = &llc->shared_cpu_map;
+		return cpumask_weight(*map);
+	}
+
+	/* shared_cpu_map of offlined CPU will be cleared, so use sibling map */
+	for_each_cpu(sibling, &llc->shared_cpu_map) {
+		if (sibling == cpu || !last_level_cache_is_valid(sibling))
+			continue;
+		sib_llc = per_cpu_cacheinfo_idx(sibling, cache_leaves(sibling) - 1);
+		*map = &sib_llc->shared_cpu_map;
+		return cpumask_weight(*map);
+	}
+
+	return 0;
+}
+
+/*
+ * Calculate the size of the per-CPU data cache slice.  This can be
+ * used to estimate the size of the data cache slice that can be used
+ * by one CPU under ideal circumstances.  UNIFIED caches are counted
+ * in addition to DATA caches.  So, please consider code cache usage
+ * when use the result.
+ *
+ * Because the cache inclusive/non-inclusive information isn't
+ * available, we just use the size of the per-CPU slice of LLC to make
+ * the result more predictable across architectures.
+ */
+static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
+{
+	struct cpu_cacheinfo *ci;
+	struct cacheinfo *llc;
+	unsigned int nr_shared;
+
+	if (!last_level_cache_is_valid(cpu))
+		return;
+
+	ci = ci_cacheinfo(cpu);
+	llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+	if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
+		return;
+
+	nr_shared = cpumask_weight(&llc->shared_cpu_map);
+	if (nr_shared)
+		ci->per_cpu_data_slice_size = llc->size / nr_shared;
+}
+
+static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu,
+					   cpumask_t *cpu_map)
+{
+	unsigned int icpu;
+
+	for_each_cpu(icpu, cpu_map) {
+		if (!cpu_online && icpu == cpu)
+			continue;
+		update_per_cpu_data_slice_size_cpu(icpu);
+		setup_pcp_cacheinfo(icpu);
+	}
+}
+
 static int cacheinfo_cpu_online(unsigned int cpu)
 {
 	int rc = detect_cache_attributes(cpu);
+	cpumask_t *cpu_map;
 
 	if (rc)
 		return rc;
 	rc = cache_add_dev(cpu);
 	if (rc)
-		free_cache_attributes(cpu);
+		goto err;
+	if (cpu_map_shared_cache(true, cpu, &cpu_map))
+		update_per_cpu_data_slice_size(true, cpu, cpu_map);
+	return 0;
+err:
+	free_cache_attributes(cpu);
 	return rc;
 }
 
 static int cacheinfo_cpu_pre_down(unsigned int cpu)
 {
+	cpumask_t *cpu_map;
+	unsigned int nr_shared;
+
+	nr_shared = cpu_map_shared_cache(false, cpu, &cpu_map);
 	if (cpumask_test_and_clear_cpu(cpu, &cache_dev_map))
 		cpu_cache_sysfs_exit(cpu);
 
 	free_cache_attributes(cpu);
+	if (nr_shared > 1)
+		update_per_cpu_data_slice_size(false, cpu, cpu_map);
 	return 0;
 }
 
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 7476f393df97..2526c57d924e 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -16,12 +16,56 @@
 #include <linux/kdev_t.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include "base.h"
 
+/* /sys/class */
+static struct kset *class_kset;
+
 #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr)
 
+/**
+ * class_to_subsys - Turn a struct class into a struct subsys_private
+ *
+ * @class: pointer to the struct bus_type to look up
+ *
+ * The driver core internals need to work on the subsys_private structure, not
+ * the external struct class pointer.  This function walks the list of
+ * registered classes in the system and finds the matching one and returns the
+ * internal struct subsys_private that relates to that class.
+ *
+ * Note, the reference count of the return value is INCREMENTED if it is not
+ * NULL.  A call to subsys_put() must be done when finished with the pointer in
+ * order for it to be properly freed.
+ */
+struct subsys_private *class_to_subsys(const struct class *class)
+{
+	struct subsys_private *sp = NULL;
+	struct kobject *kobj;
+
+	if (!class || !class_kset)
+		return NULL;
+
+	spin_lock(&class_kset->list_lock);
+
+	if (list_empty(&class_kset->list))
+		goto done;
+
+	list_for_each_entry(kobj, &class_kset->list, entry) {
+		struct kset *kset = container_of(kobj, struct kset, kobj);
+
+		sp = container_of_const(kset, struct subsys_private, subsys);
+		if (sp->class == class)
+			goto done;
+	}
+	sp = NULL;
+done:
+	sp = subsys_get(sp);
+	spin_unlock(&class_kset->list_lock);
+	return sp;
+}
+
 static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr,
 			       char *buf)
 {
@@ -49,7 +93,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 static void class_release(struct kobject *kobj)
 {
 	struct subsys_private *cp = to_subsys_private(kobj);
-	struct class *class = cp->class;
+	const struct class *class = cp->class;
 
 	pr_debug("class '%s': release.\n", class->name);
 
@@ -59,13 +103,14 @@ static void class_release(struct kobject *kobj)
 		pr_debug("class '%s' does not have a release() function, "
 			 "be careful\n", class->name);
 
+	lockdep_unregister_key(&cp->lock_key);
 	kfree(cp);
 }
 
-static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj)
+static const struct kobj_ns_type_operations *class_child_ns_type(const struct kobject *kobj)
 {
-	struct subsys_private *cp = to_subsys_private(kobj);
-	struct class *class = cp->class;
+	const struct subsys_private *cp = to_subsys_private(kobj);
+	const struct class *class = cp->class;
 
 	return class->ns_type;
 }
@@ -75,48 +120,40 @@ static const struct sysfs_ops class_sysfs_ops = {
 	.store	   = class_attr_store,
 };
 
-static struct kobj_type class_ktype = {
+static const struct kobj_type class_ktype = {
 	.sysfs_ops	= &class_sysfs_ops,
 	.release	= class_release,
 	.child_ns_type	= class_child_ns_type,
 };
 
-/* Hotplug events for classes go to the class subsys */
-static struct kset *class_kset;
-
-
-int class_create_file_ns(struct class *cls, const struct class_attribute *attr,
+int class_create_file_ns(const struct class *cls, const struct class_attribute *attr,
 			 const void *ns)
 {
+	struct subsys_private *sp = class_to_subsys(cls);
 	int error;
 
-	if (cls)
-		error = sysfs_create_file_ns(&cls->p->subsys.kobj,
-					     &attr->attr, ns);
-	else
-		error = -EINVAL;
+	if (!sp)
+		return -EINVAL;
+
+	error = sysfs_create_file_ns(&sp->subsys.kobj, &attr->attr, ns);
+	subsys_put(sp);
+
 	return error;
 }
+EXPORT_SYMBOL_GPL(class_create_file_ns);
 
-void class_remove_file_ns(struct class *cls, const struct class_attribute *attr,
+void class_remove_file_ns(const struct class *cls, const struct class_attribute *attr,
 			  const void *ns)
 {
-	if (cls)
-		sysfs_remove_file_ns(&cls->p->subsys.kobj, &attr->attr, ns);
-}
+	struct subsys_private *sp = class_to_subsys(cls);
 
-static struct class *class_get(struct class *cls)
-{
-	if (cls)
-		kset_get(&cls->p->subsys);
-	return cls;
-}
+	if (!sp)
+		return;
 
-static void class_put(struct class *cls)
-{
-	if (cls)
-		kset_put(&cls->p->subsys);
+	sysfs_remove_file_ns(&sp->subsys.kobj, &attr->attr, ns);
+	subsys_put(sp);
 }
+EXPORT_SYMBOL_GPL(class_remove_file_ns);
 
 static struct device *klist_class_to_dev(struct klist_node *n)
 {
@@ -138,82 +175,85 @@ static void klist_class_dev_put(struct klist_node *n)
 	put_device(dev);
 }
 
-static int class_add_groups(struct class *cls,
-			    const struct attribute_group **groups)
-{
-	return sysfs_create_groups(&cls->p->subsys.kobj, groups);
-}
-
-static void class_remove_groups(struct class *cls,
-				const struct attribute_group **groups)
-{
-	return sysfs_remove_groups(&cls->p->subsys.kobj, groups);
-}
-
-int __class_register(struct class *cls, struct lock_class_key *key)
+int class_register(const struct class *cls)
 {
 	struct subsys_private *cp;
+	struct lock_class_key *key;
 	int error;
 
 	pr_debug("device class '%s': registering\n", cls->name);
 
+	if (cls->ns_type && !cls->namespace) {
+		pr_err("%s: class '%s' does not have namespace\n",
+		       __func__, cls->name);
+		return -EINVAL;
+	}
+	if (!cls->ns_type && cls->namespace) {
+		pr_err("%s: class '%s' does not have ns_type\n",
+		       __func__, cls->name);
+		return -EINVAL;
+	}
+
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
 	klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put);
 	INIT_LIST_HEAD(&cp->interfaces);
 	kset_init(&cp->glue_dirs);
+	key = &cp->lock_key;
+	lockdep_register_key(key);
 	__mutex_init(&cp->mutex, "subsys mutex", key);
 	error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name);
-	if (error) {
-		kfree(cp);
-		return error;
-	}
-
-	/* set the default /sys/dev directory for devices of this class */
-	if (!cls->dev_kobj)
-		cls->dev_kobj = sysfs_dev_char_kobj;
+	if (error)
+		goto err_out;
 
-#if defined(CONFIG_BLOCK)
-	/* let the block class directory show up in the root of sysfs */
-	if (!sysfs_deprecated || cls != &block_class)
-		cp->subsys.kobj.kset = class_kset;
-#else
 	cp->subsys.kobj.kset = class_kset;
-#endif
 	cp->subsys.kobj.ktype = &class_ktype;
 	cp->class = cls;
-	cls->p = cp;
 
 	error = kset_register(&cp->subsys);
+	if (error)
+		goto err_out;
+
+	error = sysfs_create_groups(&cp->subsys.kobj, cls->class_groups);
 	if (error) {
-		kfree(cp);
-		return error;
+		kobject_del(&cp->subsys.kobj);
+		kfree_const(cp->subsys.kobj.name);
+		goto err_out;
 	}
-	error = class_add_groups(class_get(cls), cls->class_groups);
-	class_put(cls);
+	return 0;
+
+err_out:
+	lockdep_unregister_key(key);
+	kfree(cp);
 	return error;
 }
-EXPORT_SYMBOL_GPL(__class_register);
+EXPORT_SYMBOL_GPL(class_register);
 
-void class_unregister(struct class *cls)
+void class_unregister(const struct class *cls)
 {
+	struct subsys_private *sp = class_to_subsys(cls);
+
+	if (!sp)
+		return;
+
 	pr_debug("device class '%s': unregistering\n", cls->name);
-	class_remove_groups(cls, cls->class_groups);
-	kset_unregister(&cls->p->subsys);
+
+	sysfs_remove_groups(&sp->subsys.kobj, cls->class_groups);
+	kset_unregister(&sp->subsys);
+	subsys_put(sp);
 }
+EXPORT_SYMBOL_GPL(class_unregister);
 
-static void class_create_release(struct class *cls)
+static void class_create_release(const struct class *cls)
 {
 	pr_debug("%s called for %s\n", __func__, cls->name);
 	kfree(cls);
 }
 
 /**
- * __class_create - create a struct class structure
- * @owner: pointer to the module that is to "own" this struct class
+ * class_create - create a struct class structure
  * @name: pointer to a string for the name of this class.
- * @key: the lock_class_key for this class; used by mutex lock debugging
  *
  * This is used to create a struct class pointer that can then be used
  * in calls to device_create().
@@ -223,8 +263,7 @@ static void class_create_release(struct class *cls)
  * Note, the pointer created here is to be destroyed when finished by
  * making a call to class_destroy().
  */
-struct class *__class_create(struct module *owner, const char *name,
-			     struct lock_class_key *key)
+struct class *class_create(const char *name)
 {
 	struct class *cls;
 	int retval;
@@ -236,10 +275,9 @@ struct class *__class_create(struct module *owner, const char *name,
 	}
 
 	cls->name = name;
-	cls->owner = owner;
 	cls->class_release = class_create_release;
 
-	retval = __class_register(cls, key);
+	retval = class_register(cls);
 	if (retval)
 		goto error;
 
@@ -249,7 +287,7 @@ error:
 	kfree(cls);
 	return ERR_PTR(retval);
 }
-EXPORT_SYMBOL_GPL(__class_create);
+EXPORT_SYMBOL_GPL(class_create);
 
 /**
  * class_destroy - destroys a struct class structure
@@ -258,13 +296,14 @@ EXPORT_SYMBOL_GPL(__class_create);
  * Note, the pointer to be destroyed must have been created with a call
  * to class_create().
  */
-void class_destroy(struct class *cls)
+void class_destroy(const struct class *cls)
 {
-	if ((cls == NULL) || (IS_ERR(cls)))
+	if (IS_ERR_OR_NULL(cls))
 		return;
 
 	class_unregister(cls);
 }
+EXPORT_SYMBOL_GPL(class_destroy);
 
 /**
  * class_dev_iter_init - initialize class device iterator
@@ -278,15 +317,24 @@ void class_destroy(struct class *cls)
  * otherwise if it is NULL, the iteration starts at the beginning of
  * the list.
  */
-void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
-			 struct device *start, const struct device_type *type)
+void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class,
+			 const struct device *start, const struct device_type *type)
 {
+	struct subsys_private *sp = class_to_subsys(class);
 	struct klist_node *start_knode = NULL;
 
+	memset(iter, 0, sizeof(*iter));
+	if (!sp) {
+		pr_crit("%s: class %p was not registered yet\n",
+			__func__, class);
+		return;
+	}
+
 	if (start)
 		start_knode = &start->p->knode_class;
-	klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode);
+	klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode);
 	iter->type = type;
+	iter->sp = sp;
 }
 EXPORT_SYMBOL_GPL(class_dev_iter_init);
 
@@ -307,6 +355,9 @@ struct device *class_dev_iter_next(struct class_dev_iter *iter)
 	struct klist_node *knode;
 	struct device *dev;
 
+	if (!iter->sp)
+		return NULL;
+
 	while (1) {
 		knode = klist_next(&iter->ki);
 		if (!knode)
@@ -328,6 +379,7 @@ EXPORT_SYMBOL_GPL(class_dev_iter_next);
 void class_dev_iter_exit(struct class_dev_iter *iter)
 {
 	klist_iter_exit(&iter->ki);
+	subsys_put(iter->sp);
 }
 EXPORT_SYMBOL_GPL(class_dev_iter_exit);
 
@@ -349,17 +401,18 @@ EXPORT_SYMBOL_GPL(class_dev_iter_exit);
  * @fn is allowed to do anything including calling back into class
  * code.  There's no locking restriction.
  */
-int class_for_each_device(struct class *class, struct device *start,
-			  void *data, int (*fn)(struct device *, void *))
+int class_for_each_device(const struct class *class, const struct device *start,
+			  void *data, device_iter_t fn)
 {
+	struct subsys_private *sp = class_to_subsys(class);
 	struct class_dev_iter iter;
 	struct device *dev;
 	int error = 0;
 
 	if (!class)
 		return -EINVAL;
-	if (!class->p) {
-		WARN(1, "%s called for class '%s' before it was initialized",
+	if (!sp) {
+		WARN(1, "%s called for class '%s' before it was registered",
 		     __func__, class->name);
 		return -EINVAL;
 	}
@@ -371,6 +424,7 @@ int class_for_each_device(struct class *class, struct device *start,
 			break;
 	}
 	class_dev_iter_exit(&iter);
+	subsys_put(sp);
 
 	return error;
 }
@@ -396,17 +450,17 @@ EXPORT_SYMBOL_GPL(class_for_each_device);
  * @match is allowed to do anything including calling back into class
  * code.  There's no locking restriction.
  */
-struct device *class_find_device(struct class *class, struct device *start,
-				 const void *data,
-				 int (*match)(struct device *, const void *))
+struct device *class_find_device(const struct class *class, const struct device *start,
+				 const void *data, device_match_t match)
 {
+	struct subsys_private *sp = class_to_subsys(class);
 	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!class)
 		return NULL;
-	if (!class->p) {
-		WARN(1, "%s called for class '%s' before it was initialized",
+	if (!sp) {
+		WARN(1, "%s called for class '%s' before it was registered",
 		     __func__, class->name);
 		return NULL;
 	}
@@ -419,6 +473,7 @@ struct device *class_find_device(struct class *class, struct device *start,
 		}
 	}
 	class_dev_iter_exit(&iter);
+	subsys_put(sp);
 
 	return dev;
 }
@@ -426,54 +481,74 @@ EXPORT_SYMBOL_GPL(class_find_device);
 
 int class_interface_register(struct class_interface *class_intf)
 {
-	struct class *parent;
+	struct subsys_private *sp;
+	const struct class *parent;
 	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!class_intf || !class_intf->class)
 		return -ENODEV;
 
-	parent = class_get(class_intf->class);
-	if (!parent)
+	parent = class_intf->class;
+	sp = class_to_subsys(parent);
+	if (!sp)
 		return -EINVAL;
 
-	mutex_lock(&parent->p->mutex);
-	list_add_tail(&class_intf->node, &parent->p->interfaces);
+	/*
+	 * Reference in sp is now incremented and will be dropped when
+	 * the interface is removed in the call to class_interface_unregister()
+	 */
+
+	mutex_lock(&sp->mutex);
+	list_add_tail(&class_intf->node, &sp->interfaces);
 	if (class_intf->add_dev) {
 		class_dev_iter_init(&iter, parent, NULL, NULL);
 		while ((dev = class_dev_iter_next(&iter)))
-			class_intf->add_dev(dev, class_intf);
+			class_intf->add_dev(dev);
 		class_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&parent->p->mutex);
+	mutex_unlock(&sp->mutex);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(class_interface_register);
 
 void class_interface_unregister(struct class_interface *class_intf)
 {
-	struct class *parent = class_intf->class;
+	struct subsys_private *sp;
+	const struct class *parent = class_intf->class;
 	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!parent)
 		return;
 
-	mutex_lock(&parent->p->mutex);
+	sp = class_to_subsys(parent);
+	if (!sp)
+		return;
+
+	mutex_lock(&sp->mutex);
 	list_del_init(&class_intf->node);
 	if (class_intf->remove_dev) {
 		class_dev_iter_init(&iter, parent, NULL, NULL);
 		while ((dev = class_dev_iter_next(&iter)))
-			class_intf->remove_dev(dev, class_intf);
+			class_intf->remove_dev(dev);
 		class_dev_iter_exit(&iter);
 	}
-	mutex_unlock(&parent->p->mutex);
+	mutex_unlock(&sp->mutex);
 
-	class_put(parent);
+	/*
+	 * Decrement the reference count twice, once for the class_to_subsys()
+	 * call in the start of this function, and the second one from the
+	 * reference increment in class_interface_register()
+	 */
+	subsys_put(sp);
+	subsys_put(sp);
 }
+EXPORT_SYMBOL_GPL(class_interface_unregister);
 
-ssize_t show_class_attr_string(struct class *class,
-			       struct class_attribute *attr, char *buf)
+ssize_t show_class_attr_string(const struct class *class,
+			       const struct class_attribute *attr, char *buf)
 {
 	struct class_attribute_string *cs;
 
@@ -526,30 +601,10 @@ EXPORT_SYMBOL_GPL(class_compat_unregister);
  *			      a bus device
  * @cls: the compatibility class
  * @dev: the target bus device
- * @device_link: an optional device to which a "device" link should be created
  */
-int class_compat_create_link(struct class_compat *cls, struct device *dev,
-			     struct device *device_link)
+int class_compat_create_link(struct class_compat *cls, struct device *dev)
 {
-	int error;
-
-	error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev));
-	if (error)
-		return error;
-
-	/*
-	 * Optionally add a "device" link (typically to the parent), as a
-	 * class device would have one and we want to provide as much
-	 * backwards compatibility as possible.
-	 */
-	if (device_link) {
-		error = sysfs_create_link(&dev->kobj, &device_link->kobj,
-					  "device");
-		if (error)
-			sysfs_remove_link(cls->kobj, dev_name(dev));
-	}
-
-	return error;
+	return sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev));
 }
 EXPORT_SYMBOL_GPL(class_compat_create_link);
 
@@ -558,18 +613,38 @@ EXPORT_SYMBOL_GPL(class_compat_create_link);
  *			      a bus device
  * @cls: the compatibility class
  * @dev: the target bus device
- * @device_link: an optional device to which a "device" link was previously
- * 		 created
  */
-void class_compat_remove_link(struct class_compat *cls, struct device *dev,
-			      struct device *device_link)
+void class_compat_remove_link(struct class_compat *cls, struct device *dev)
 {
-	if (device_link)
-		sysfs_remove_link(&dev->kobj, "device");
 	sysfs_remove_link(cls->kobj, dev_name(dev));
 }
 EXPORT_SYMBOL_GPL(class_compat_remove_link);
 
+/**
+ * class_is_registered - determine if at this moment in time, a class is
+ *			 registered in the driver core or not.
+ * @class: the class to check
+ *
+ * Returns a boolean to state if the class is registered in the driver core
+ * or not.  Note that the value could switch right after this call is made,
+ * so only use this in places where you "know" it is safe to do so (usually
+ * to determine if the specific class has been registered yet or not).
+ *
+ * Be careful in using this.
+ */
+bool class_is_registered(const struct class *class)
+{
+	struct subsys_private *sp = class_to_subsys(class);
+	bool is_initialized = false;
+
+	if (sp) {
+		is_initialized = true;
+		subsys_put(sp);
+	}
+	return is_initialized;
+}
+EXPORT_SYMBOL_GPL(class_is_registered);
+
 int __init classes_init(void)
 {
 	class_kset = kset_create_and_add("class", NULL, NULL);
@@ -577,11 +652,3 @@ int __init classes_init(void)
 		return -ENOMEM;
 	return 0;
 }
-
-EXPORT_SYMBOL_GPL(class_create_file_ns);
-EXPORT_SYMBOL_GPL(class_remove_file_ns);
-EXPORT_SYMBOL_GPL(class_unregister);
-EXPORT_SYMBOL_GPL(class_destroy);
-
-EXPORT_SYMBOL_GPL(class_interface_register);
-EXPORT_SYMBOL_GPL(class_interface_unregister);
diff --git a/drivers/base/component.c b/drivers/base/component.c
index 5e79299f6c3f..024ad9471b8a 100644
--- a/drivers/base/component.c
+++ b/drivers/base/component.c
@@ -1,17 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Componentized device handling.
- *
- * This is work in progress.  We gather up the component devices into a list,
- * and bind them when instructed.  At the moment, we're specific to the DRM
- * subsystem, and only handles one master device, but this doesn't have to be
- * the case.
  */
 #include <linux/component.h>
 #include <linux/device.h>
-#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
 
@@ -58,7 +53,7 @@ struct component_match {
 	struct component_match_array *compare;
 };
 
-struct master {
+struct aggregate_device {
 	struct list_head node;
 	bool bound;
 
@@ -69,7 +64,7 @@ struct master {
 
 struct component {
 	struct list_head node;
-	struct master *master;
+	struct aggregate_device *adev;
 	bool bound;
 
 	const struct component_ops *ops;
@@ -79,7 +74,7 @@ struct component {
 
 static DEFINE_MUTEX(component_mutex);
 static LIST_HEAD(component_list);
-static LIST_HEAD(masters);
+static LIST_HEAD(aggregate_devices);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -87,22 +82,22 @@ static struct dentry *component_debugfs_dir;
 
 static int component_devices_show(struct seq_file *s, void *data)
 {
-	struct master *m = s->private;
+	struct aggregate_device *m = s->private;
 	struct component_match *match = m->match;
 	size_t i;
 
 	mutex_lock(&component_mutex);
-	seq_printf(s, "%-40s %20s\n", "master name", "status");
-	seq_puts(s, "-------------------------------------------------------------\n");
-	seq_printf(s, "%-40s %20s\n\n",
+	seq_printf(s, "%-50s %20s\n", "aggregate_device name", "status");
+	seq_puts(s, "-----------------------------------------------------------------------\n");
+	seq_printf(s, "%-50s %20s\n\n",
 		   dev_name(m->parent), m->bound ? "bound" : "not bound");
 
-	seq_printf(s, "%-40s %20s\n", "device name", "status");
-	seq_puts(s, "-------------------------------------------------------------\n");
+	seq_printf(s, "%-50s %20s\n", "device name", "status");
+	seq_puts(s, "-----------------------------------------------------------------------\n");
 	for (i = 0; i < match->num; i++) {
 		struct component *component = match->compare[i].component;
 
-		seq_printf(s, "%-40s %20s\n",
+		seq_printf(s, "%-50s %20s\n",
 			   component ? dev_name(component->dev) : "(unknown)",
 			   component ? (component->bound ? "bound" : "not bound") : "not registered");
 	}
@@ -122,46 +117,46 @@ static int __init component_debug_init(void)
 
 core_initcall(component_debug_init);
 
-static void component_master_debugfs_add(struct master *m)
+static void component_debugfs_add(struct aggregate_device *m)
 {
 	debugfs_create_file(dev_name(m->parent), 0444, component_debugfs_dir, m,
 			    &component_devices_fops);
 }
 
-static void component_master_debugfs_del(struct master *m)
+static void component_debugfs_del(struct aggregate_device *m)
 {
-	debugfs_remove(debugfs_lookup(dev_name(m->parent), component_debugfs_dir));
+	debugfs_lookup_and_remove(dev_name(m->parent), component_debugfs_dir);
 }
 
 #else
 
-static void component_master_debugfs_add(struct master *m)
+static void component_debugfs_add(struct aggregate_device *m)
 { }
 
-static void component_master_debugfs_del(struct master *m)
+static void component_debugfs_del(struct aggregate_device *m)
 { }
 
 #endif
 
-static struct master *__master_find(struct device *parent,
+static struct aggregate_device *__aggregate_find(struct device *parent,
 	const struct component_master_ops *ops)
 {
-	struct master *m;
+	struct aggregate_device *m;
 
-	list_for_each_entry(m, &masters, node)
+	list_for_each_entry(m, &aggregate_devices, node)
 		if (m->parent == parent && (!ops || m->ops == ops))
 			return m;
 
 	return NULL;
 }
 
-static struct component *find_component(struct master *master,
+static struct component *find_component(struct aggregate_device *adev,
 	struct component_match_array *mc)
 {
 	struct component *c;
 
 	list_for_each_entry(c, &component_list, node) {
-		if (c->master && c->master != master)
+		if (c->adev && c->adev != adev)
 			continue;
 
 		if (mc->compare && mc->compare(c->dev, mc->data))
@@ -175,101 +170,103 @@ static struct component *find_component(struct master *master,
 	return NULL;
 }
 
-static int find_components(struct master *master)
+static int find_components(struct aggregate_device *adev)
 {
-	struct component_match *match = master->match;
+	struct component_match *match = adev->match;
 	size_t i;
 	int ret = 0;
 
 	/*
 	 * Scan the array of match functions and attach
-	 * any components which are found to this master.
+	 * any components which are found to this adev.
 	 */
 	for (i = 0; i < match->num; i++) {
 		struct component_match_array *mc = &match->compare[i];
 		struct component *c;
 
-		dev_dbg(master->parent, "Looking for component %zu\n", i);
+		dev_dbg(adev->parent, "Looking for component %zu\n", i);
 
 		if (match->compare[i].component)
 			continue;
 
-		c = find_component(master, mc);
+		c = find_component(adev, mc);
 		if (!c) {
 			ret = -ENXIO;
 			break;
 		}
 
-		dev_dbg(master->parent, "found component %s, duplicate %u\n", dev_name(c->dev), !!c->master);
+		dev_dbg(adev->parent, "found component %s, duplicate %u\n",
+			dev_name(c->dev), !!c->adev);
 
-		/* Attach this component to the master */
-		match->compare[i].duplicate = !!c->master;
+		/* Attach this component to the adev */
+		match->compare[i].duplicate = !!c->adev;
 		match->compare[i].component = c;
-		c->master = master;
+		c->adev = adev;
 	}
 	return ret;
 }
 
-/* Detach component from associated master */
-static void remove_component(struct master *master, struct component *c)
+/* Detach component from associated aggregate_device */
+static void remove_component(struct aggregate_device *adev, struct component *c)
 {
 	size_t i;
 
-	/* Detach the component from this master. */
-	for (i = 0; i < master->match->num; i++)
-		if (master->match->compare[i].component == c)
-			master->match->compare[i].component = NULL;
+	/* Detach the component from this adev. */
+	for (i = 0; i < adev->match->num; i++)
+		if (adev->match->compare[i].component == c)
+			adev->match->compare[i].component = NULL;
 }
 
 /*
- * Try to bring up a master.  If component is NULL, we're interested in
- * this master, otherwise it's a component which must be present to try
- * and bring up the master.
+ * Try to bring up an aggregate device.  If component is NULL, we're interested
+ * in this aggregate device, otherwise it's a component which must be present
+ * to try and bring up the aggregate device.
  *
  * Returns 1 for successful bringup, 0 if not ready, or -ve errno.
  */
-static int try_to_bring_up_master(struct master *master,
+static int try_to_bring_up_aggregate_device(struct aggregate_device *adev,
 	struct component *component)
 {
 	int ret;
 
-	dev_dbg(master->parent, "trying to bring up master\n");
+	dev_dbg(adev->parent, "trying to bring up adev\n");
 
-	if (find_components(master)) {
-		dev_dbg(master->parent, "master has incomplete components\n");
+	if (find_components(adev)) {
+		dev_dbg(adev->parent, "master has incomplete components\n");
 		return 0;
 	}
 
-	if (component && component->master != master) {
-		dev_dbg(master->parent, "master is not for this component (%s)\n",
+	if (component && component->adev != adev) {
+		dev_dbg(adev->parent, "master is not for this component (%s)\n",
 			dev_name(component->dev));
 		return 0;
 	}
 
-	if (!devres_open_group(master->parent, NULL, GFP_KERNEL))
+	if (!devres_open_group(adev->parent, adev, GFP_KERNEL))
 		return -ENOMEM;
 
 	/* Found all components */
-	ret = master->ops->bind(master->parent);
+	ret = adev->ops->bind(adev->parent);
 	if (ret < 0) {
-		devres_release_group(master->parent, NULL);
+		devres_release_group(adev->parent, NULL);
 		if (ret != -EPROBE_DEFER)
-			dev_info(master->parent, "master bind failed: %d\n", ret);
+			dev_info(adev->parent, "adev bind failed: %d\n", ret);
 		return ret;
 	}
 
-	master->bound = true;
+	devres_close_group(adev->parent, NULL);
+	adev->bound = true;
 	return 1;
 }
 
 static int try_to_bring_up_masters(struct component *component)
 {
-	struct master *m;
+	struct aggregate_device *adev;
 	int ret = 0;
 
-	list_for_each_entry(m, &masters, node) {
-		if (!m->bound) {
-			ret = try_to_bring_up_master(m, component);
+	list_for_each_entry(adev, &aggregate_devices, node) {
+		if (!adev->bound) {
+			ret = try_to_bring_up_aggregate_device(adev, component);
 			if (ret != 0)
 				break;
 		}
@@ -278,15 +275,72 @@ static int try_to_bring_up_masters(struct component *component)
 	return ret;
 }
 
-static void take_down_master(struct master *master)
+static void take_down_aggregate_device(struct aggregate_device *adev)
 {
-	if (master->bound) {
-		master->ops->unbind(master->parent);
-		devres_release_group(master->parent, NULL);
-		master->bound = false;
+	if (adev->bound) {
+		adev->ops->unbind(adev->parent);
+		devres_release_group(adev->parent, adev);
+		adev->bound = false;
 	}
 }
 
+/**
+ * component_compare_of - A common component compare function for of_node
+ * @dev: component device
+ * @data: @compare_data from component_match_add_release()
+ *
+ * A common compare function when compare_data is device of_node. e.g.
+ * component_match_add_release(masterdev, &match, component_release_of,
+ * component_compare_of, component_dev_of_node)
+ */
+int component_compare_of(struct device *dev, void *data)
+{
+	return device_match_of_node(dev, data);
+}
+EXPORT_SYMBOL_GPL(component_compare_of);
+
+/**
+ * component_release_of - A common component release function for of_node
+ * @dev: component device
+ * @data: @compare_data from component_match_add_release()
+ *
+ * About the example, Please see component_compare_of().
+ */
+void component_release_of(struct device *dev, void *data)
+{
+	of_node_put(data);
+}
+EXPORT_SYMBOL_GPL(component_release_of);
+
+/**
+ * component_compare_dev - A common component compare function for dev
+ * @dev: component device
+ * @data: @compare_data from component_match_add_release()
+ *
+ * A common compare function when compare_data is struce device. e.g.
+ * component_match_add(masterdev, &match, component_compare_dev, component_dev)
+ */
+int component_compare_dev(struct device *dev, void *data)
+{
+	return dev == data;
+}
+EXPORT_SYMBOL_GPL(component_compare_dev);
+
+/**
+ * component_compare_dev_name - A common component compare function for device name
+ * @dev: component device
+ * @data: @compare_data from component_match_add_release()
+ *
+ * A common compare function when compare_data is device name string. e.g.
+ * component_match_add(masterdev, &match, component_compare_dev_name,
+ * "component_dev_name")
+ */
+int component_compare_dev_name(struct device *dev, void *data)
+{
+	return device_match_name(dev, data);
+}
+EXPORT_SYMBOL_GPL(component_compare_dev_name);
+
 static void devm_component_match_release(struct device *parent, void *res)
 {
 	struct component_match *match = res;
@@ -324,7 +378,7 @@ static int component_match_realloc(struct component_match *match, size_t num)
 	return 0;
 }
 
-static void __component_match_add(struct device *master,
+static void __component_match_add(struct device *parent,
 	struct component_match **matchptr,
 	void (*release)(struct device *, void *),
 	int (*compare)(struct device *, void *),
@@ -344,7 +398,7 @@ static void __component_match_add(struct device *master,
 			return;
 		}
 
-		devres_add(master, match);
+		devres_add(parent, match);
 
 		*matchptr = match;
 	}
@@ -370,13 +424,13 @@ static void __component_match_add(struct device *master,
 
 /**
  * component_match_add_release - add a component match entry with release callback
- * @master: device with the aggregate driver
+ * @parent: parent device of the aggregate driver
  * @matchptr: pointer to the list of component matches
  * @release: release function for @compare_data
  * @compare: compare function to match against all components
  * @compare_data: opaque pointer passed to the @compare function
  *
- * Adds a new component match to the list stored in @matchptr, which the @master
+ * Adds a new component match to the list stored in @matchptr, which the
  * aggregate driver needs to function. The list of component matches pointed to
  * by @matchptr must be initialized to NULL before adding the first match. This
  * only matches against components added with component_add().
@@ -388,24 +442,24 @@ static void __component_match_add(struct device *master,
  *
  * See also component_match_add() and component_match_add_typed().
  */
-void component_match_add_release(struct device *master,
+void component_match_add_release(struct device *parent,
 	struct component_match **matchptr,
 	void (*release)(struct device *, void *),
 	int (*compare)(struct device *, void *), void *compare_data)
 {
-	__component_match_add(master, matchptr, release, compare, NULL,
+	__component_match_add(parent, matchptr, release, compare, NULL,
 			      compare_data);
 }
 EXPORT_SYMBOL(component_match_add_release);
 
 /**
  * component_match_add_typed - add a component match entry for a typed component
- * @master: device with the aggregate driver
+ * @parent: parent device of the aggregate driver
  * @matchptr: pointer to the list of component matches
  * @compare_typed: compare function to match against all typed components
  * @compare_data: opaque pointer passed to the @compare function
  *
- * Adds a new component match to the list stored in @matchptr, which the @master
+ * Adds a new component match to the list stored in @matchptr, which the
  * aggregate driver needs to function. The list of component matches pointed to
  * by @matchptr must be initialized to NULL before adding the first match. This
  * only matches against components added with component_add_typed().
@@ -415,32 +469,32 @@ EXPORT_SYMBOL(component_match_add_release);
  *
  * See also component_match_add_release() and component_match_add_typed().
  */
-void component_match_add_typed(struct device *master,
+void component_match_add_typed(struct device *parent,
 	struct component_match **matchptr,
 	int (*compare_typed)(struct device *, int, void *), void *compare_data)
 {
-	__component_match_add(master, matchptr, NULL, NULL, compare_typed,
+	__component_match_add(parent, matchptr, NULL, NULL, compare_typed,
 			      compare_data);
 }
 EXPORT_SYMBOL(component_match_add_typed);
 
-static void free_master(struct master *master)
+static void free_aggregate_device(struct aggregate_device *adev)
 {
-	struct component_match *match = master->match;
+	struct component_match *match = adev->match;
 	int i;
 
-	component_master_debugfs_del(master);
-	list_del(&master->node);
+	component_debugfs_del(adev);
+	list_del(&adev->node);
 
 	if (match) {
 		for (i = 0; i < match->num; i++) {
 			struct component *c = match->compare[i].component;
 			if (c)
-				c->master = NULL;
+				c->adev = NULL;
 		}
 	}
 
-	kfree(master);
+	kfree(adev);
 }
 
 /**
@@ -459,7 +513,7 @@ int component_master_add_with_match(struct device *parent,
 	const struct component_master_ops *ops,
 	struct component_match *match)
 {
-	struct master *master;
+	struct aggregate_device *adev;
 	int ret;
 
 	/* Reallocate the match array for its true size */
@@ -467,23 +521,23 @@ int component_master_add_with_match(struct device *parent,
 	if (ret)
 		return ret;
 
-	master = kzalloc(sizeof(*master), GFP_KERNEL);
-	if (!master)
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
 		return -ENOMEM;
 
-	master->parent = parent;
-	master->ops = ops;
-	master->match = match;
+	adev->parent = parent;
+	adev->ops = ops;
+	adev->match = match;
 
-	component_master_debugfs_add(master);
-	/* Add to the list of available masters. */
+	component_debugfs_add(adev);
+	/* Add to the list of available aggregate devices. */
 	mutex_lock(&component_mutex);
-	list_add(&master->node, &masters);
+	list_add(&adev->node, &aggregate_devices);
 
-	ret = try_to_bring_up_master(master, NULL);
+	ret = try_to_bring_up_aggregate_device(adev, NULL);
 
 	if (ret < 0)
-		free_master(master);
+		free_aggregate_device(adev);
 
 	mutex_unlock(&component_mutex);
 
@@ -503,25 +557,43 @@ EXPORT_SYMBOL_GPL(component_master_add_with_match);
 void component_master_del(struct device *parent,
 	const struct component_master_ops *ops)
 {
-	struct master *master;
+	struct aggregate_device *adev;
 
 	mutex_lock(&component_mutex);
-	master = __master_find(parent, ops);
-	if (master) {
-		take_down_master(master);
-		free_master(master);
+	adev = __aggregate_find(parent, ops);
+	if (adev) {
+		take_down_aggregate_device(adev);
+		free_aggregate_device(adev);
 	}
 	mutex_unlock(&component_mutex);
 }
 EXPORT_SYMBOL_GPL(component_master_del);
 
+bool component_master_is_bound(struct device *parent,
+	const struct component_master_ops *ops)
+{
+	struct aggregate_device *adev;
+
+	guard(mutex)(&component_mutex);
+	adev = __aggregate_find(parent, ops);
+	if (!adev)
+		return 0;
+
+	return adev->bound;
+}
+EXPORT_SYMBOL_GPL(component_master_is_bound);
+
 static void component_unbind(struct component *component,
-	struct master *master, void *data)
+	struct aggregate_device *adev, void *data)
 {
-	WARN_ON(!component->bound);
+	if (WARN_ON(!component->bound))
+		return;
+
+	dev_dbg(adev->parent, "unbinding %s component %p (ops %ps)\n",
+		dev_name(component->dev), component, component->ops);
 
 	if (component->ops && component->ops->unbind)
-		component->ops->unbind(component->dev, master->parent, data);
+		component->ops->unbind(component->dev, adev->parent, data);
 	component->bound = false;
 
 	/* Release all resources claimed in the binding of this component */
@@ -539,26 +611,26 @@ static void component_unbind(struct component *component,
  */
 void component_unbind_all(struct device *parent, void *data)
 {
-	struct master *master;
+	struct aggregate_device *adev;
 	struct component *c;
 	size_t i;
 
 	WARN_ON(!mutex_is_locked(&component_mutex));
 
-	master = __master_find(parent, NULL);
-	if (!master)
+	adev = __aggregate_find(parent, NULL);
+	if (!adev)
 		return;
 
 	/* Unbind components in reverse order */
-	for (i = master->match->num; i--; )
-		if (!master->match->compare[i].duplicate) {
-			c = master->match->compare[i].component;
-			component_unbind(c, master, data);
+	for (i = adev->match->num; i--; )
+		if (!adev->match->compare[i].duplicate) {
+			c = adev->match->compare[i].component;
+			component_unbind(c, adev, data);
 		}
 }
 EXPORT_SYMBOL_GPL(component_unbind_all);
 
-static int component_bind(struct component *component, struct master *master,
+static int component_bind(struct component *component, struct aggregate_device *adev,
 	void *data)
 {
 	int ret;
@@ -568,7 +640,7 @@ static int component_bind(struct component *component, struct master *master,
 	 * This allows us to roll-back a failed component without
 	 * affecting anything else.
 	 */
-	if (!devres_open_group(master->parent, NULL, GFP_KERNEL))
+	if (!devres_open_group(adev->parent, NULL, GFP_KERNEL))
 		return -ENOMEM;
 
 	/*
@@ -577,14 +649,14 @@ static int component_bind(struct component *component, struct master *master,
 	 * at the appropriate moment.
 	 */
 	if (!devres_open_group(component->dev, component, GFP_KERNEL)) {
-		devres_release_group(master->parent, NULL);
+		devres_release_group(adev->parent, NULL);
 		return -ENOMEM;
 	}
 
-	dev_dbg(master->parent, "binding %s (ops %ps)\n",
+	dev_dbg(adev->parent, "binding %s (ops %ps)\n",
 		dev_name(component->dev), component->ops);
 
-	ret = component->ops->bind(component->dev, master->parent, data);
+	ret = component->ops->bind(component->dev, adev->parent, data);
 	if (!ret) {
 		component->bound = true;
 
@@ -595,16 +667,16 @@ static int component_bind(struct component *component, struct master *master,
 		 * can clean those resources up independently.
 		 */
 		devres_close_group(component->dev, NULL);
-		devres_remove_group(master->parent, NULL);
+		devres_remove_group(adev->parent, NULL);
 
-		dev_info(master->parent, "bound %s (ops %ps)\n",
+		dev_info(adev->parent, "bound %s (ops %ps)\n",
 			 dev_name(component->dev), component->ops);
 	} else {
 		devres_release_group(component->dev, NULL);
-		devres_release_group(master->parent, NULL);
+		devres_release_group(adev->parent, NULL);
 
 		if (ret != -EPROBE_DEFER)
-			dev_err(master->parent, "failed to bind %s (ops %ps): %d\n",
+			dev_err(adev->parent, "failed to bind %s (ops %ps): %d\n",
 				dev_name(component->dev), component->ops, ret);
 	}
 
@@ -622,31 +694,31 @@ static int component_bind(struct component *component, struct master *master,
  */
 int component_bind_all(struct device *parent, void *data)
 {
-	struct master *master;
+	struct aggregate_device *adev;
 	struct component *c;
 	size_t i;
 	int ret = 0;
 
 	WARN_ON(!mutex_is_locked(&component_mutex));
 
-	master = __master_find(parent, NULL);
-	if (!master)
+	adev = __aggregate_find(parent, NULL);
+	if (!adev)
 		return -EINVAL;
 
 	/* Bind components in match order */
-	for (i = 0; i < master->match->num; i++)
-		if (!master->match->compare[i].duplicate) {
-			c = master->match->compare[i].component;
-			ret = component_bind(c, master, data);
+	for (i = 0; i < adev->match->num; i++)
+		if (!adev->match->compare[i].duplicate) {
+			c = adev->match->compare[i].component;
+			ret = component_bind(c, adev, data);
 			if (ret)
 				break;
 		}
 
 	if (ret != 0) {
 		for (; i > 0; i--)
-			if (!master->match->compare[i - 1].duplicate) {
-				c = master->match->compare[i - 1].component;
-				component_unbind(c, master, data);
+			if (!adev->match->compare[i - 1].duplicate) {
+				c = adev->match->compare[i - 1].component;
+				component_unbind(c, adev, data);
 			}
 	}
 
@@ -675,8 +747,8 @@ static int __component_add(struct device *dev, const struct component_ops *ops,
 
 	ret = try_to_bring_up_masters(component);
 	if (ret < 0) {
-		if (component->master)
-			remove_component(component->master, component);
+		if (component->adev)
+			remove_component(component->adev, component);
 		list_del(&component->node);
 
 		kfree(component);
@@ -697,7 +769,7 @@ static int __component_add(struct device *dev, const struct component_ops *ops,
  * component_bind_all(). See also &struct component_ops.
  *
  * @subcomponent must be nonzero and is used to differentiate between multiple
- * components registerd on the same device @dev. These components are match
+ * components registered on the same device @dev. These components are match
  * using component_match_add_typed().
  *
  * The component needs to be unregistered at driver unload/disconnect by
@@ -727,7 +799,7 @@ EXPORT_SYMBOL_GPL(component_add_typed);
  * The component needs to be unregistered at driver unload/disconnect by
  * calling component_del().
  *
- * See also component_add_typed() for a variant that allows multipled different
+ * See also component_add_typed() for a variant that allows multiple different
  * components on the same device.
  */
 int component_add(struct device *dev, const struct component_ops *ops)
@@ -757,9 +829,9 @@ void component_del(struct device *dev, const struct component_ops *ops)
 			break;
 		}
 
-	if (component && component->master) {
-		take_down_master(component->master);
-		remove_component(component->master, component);
+	if (component && component->adev) {
+		take_down_aggregate_device(component->adev);
+		remove_component(component->adev, component);
 	}
 
 	mutex_unlock(&component_mutex);
diff --git a/drivers/base/container.c b/drivers/base/container.c
index 1ba42d2d3532..f40588ebc3f5 100644
--- a/drivers/base/container.c
+++ b/drivers/base/container.c
@@ -24,7 +24,7 @@ static int container_offline(struct device *dev)
 	return cdev->offline ? cdev->offline(cdev) : 0;
 }
 
-struct bus_type container_subsys = {
+const struct bus_type container_subsys = {
 	.name = CONTAINER_BUS_NAME,
 	.dev_name = CONTAINER_BUS_NAME,
 	.online = trivial_online,
diff --git a/drivers/base/core.c b/drivers/base/core.c
index cadcade65825..40de2f51a1b1 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -9,54 +9,49 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/blkdev.h>
+#include <linux/cleanup.h>
 #include <linux/cpufreq.h>
 #include <linux/device.h>
+#include <linux/dma-map-ops.h> /* for dma_default_coherent */
 #include <linux/err.h>
 #include <linux/fwnode.h>
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
 #include <linux/kdev_t.h>
+#include <linux/kstrtox.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
 #include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/genhd.h>
-#include <linux/mutex.h>
 #include <linux/pm_runtime.h>
-#include <linux/netdevice.h>
-#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
+#include <linux/swiotlb.h>
 #include <linux/sysfs.h>
-#include <linux/dma-map-ops.h> /* for dma_default_coherent */
 
 #include "base.h"
+#include "physical_location.h"
 #include "power/power.h"
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-#ifdef CONFIG_SYSFS_DEPRECATED_V2
-long sysfs_deprecated = 1;
-#else
-long sysfs_deprecated = 0;
-#endif
-static int __init sysfs_deprecated_setup(char *arg)
-{
-	return kstrtol(arg, 10, &sysfs_deprecated);
-}
-early_param("sysfs.deprecated", sysfs_deprecated_setup);
-#endif
-
 /* Device links support. */
 static LIST_HEAD(deferred_sync);
 static unsigned int defer_sync_state_count = 1;
 static DEFINE_MUTEX(fwnode_link_lock);
 static bool fw_devlink_is_permissive(void);
+static void __fw_devlink_link_to_consumers(struct device *dev);
 static bool fw_devlink_drv_reg_done;
+static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
 
 /**
- * fwnode_link_add - Create a link between two fwnode_handles.
+ * __fwnode_link_add - Create a link between two fwnode_handles.
  * @con: Consumer end of the link.
  * @sup: Supplier end of the link.
+ * @flags: Link flags.
  *
  * Create a fwnode link between fwnode handles @con and @sup. The fwnode link
  * represents the detail that the firmware lists @sup fwnode as supplying a
@@ -70,34 +65,69 @@ static bool fw_devlink_drv_reg_done;
  * Attempts to create duplicate links between the same pair of fwnode handles
  * are ignored and there is no reference counting.
  */
-int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup)
+static int __fwnode_link_add(struct fwnode_handle *con,
+			     struct fwnode_handle *sup, u8 flags)
 {
 	struct fwnode_link *link;
-	int ret = 0;
-
-	mutex_lock(&fwnode_link_lock);
 
 	list_for_each_entry(link, &sup->consumers, s_hook)
-		if (link->consumer == con)
-			goto out;
+		if (link->consumer == con) {
+			link->flags |= flags;
+			return 0;
+		}
 
 	link = kzalloc(sizeof(*link), GFP_KERNEL);
-	if (!link) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!link)
+		return -ENOMEM;
 
 	link->supplier = sup;
 	INIT_LIST_HEAD(&link->s_hook);
 	link->consumer = con;
 	INIT_LIST_HEAD(&link->c_hook);
+	link->flags = flags;
 
 	list_add(&link->s_hook, &sup->consumers);
 	list_add(&link->c_hook, &con->suppliers);
-out:
-	mutex_unlock(&fwnode_link_lock);
+	pr_debug("%pfwf Linked as a fwnode consumer to %pfwf\n",
+		 con, sup);
 
-	return ret;
+	return 0;
+}
+
+int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup,
+		    u8 flags)
+{
+	guard(mutex)(&fwnode_link_lock);
+
+	return __fwnode_link_add(con, sup, flags);
+}
+
+/**
+ * __fwnode_link_del - Delete a link between two fwnode_handles.
+ * @link: the fwnode_link to be deleted
+ *
+ * The fwnode_link_lock needs to be held when this function is called.
+ */
+static void __fwnode_link_del(struct fwnode_link *link)
+{
+	pr_debug("%pfwf Dropping the fwnode link to %pfwf\n",
+		 link->consumer, link->supplier);
+	list_del(&link->s_hook);
+	list_del(&link->c_hook);
+	kfree(link);
+}
+
+/**
+ * __fwnode_link_cycle - Mark a fwnode link as being part of a cycle.
+ * @link: the fwnode_link to be marked
+ *
+ * The fwnode_link_lock needs to be held when this function is called.
+ */
+static void __fwnode_link_cycle(struct fwnode_link *link)
+{
+	pr_debug("%pfwf: cycle: depends on %pfwf\n",
+		 link->consumer, link->supplier);
+	link->flags |= FWLINK_FLAG_CYCLE;
 }
 
 /**
@@ -110,13 +140,10 @@ static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode)
 {
 	struct fwnode_link *link, *tmp;
 
-	mutex_lock(&fwnode_link_lock);
-	list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) {
-		list_del(&link->s_hook);
-		list_del(&link->c_hook);
-		kfree(link);
-	}
-	mutex_unlock(&fwnode_link_lock);
+	guard(mutex)(&fwnode_link_lock);
+
+	list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook)
+		__fwnode_link_del(link);
 }
 
 /**
@@ -129,13 +156,10 @@ static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode)
 {
 	struct fwnode_link *link, *tmp;
 
-	mutex_lock(&fwnode_link_lock);
-	list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) {
-		list_del(&link->s_hook);
-		list_del(&link->c_hook);
-		kfree(link);
-	}
-	mutex_unlock(&fwnode_link_lock);
+	guard(mutex)(&fwnode_link_lock);
+
+	list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook)
+		__fwnode_link_del(link);
 }
 
 /**
@@ -166,7 +190,51 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);
 
-#ifdef CONFIG_SRCU
+/**
+ * __fwnode_links_move_consumers - Move consumer from @from to @to fwnode_handle
+ * @from: move consumers away from this fwnode
+ * @to: move consumers to this fwnode
+ *
+ * Move all consumer links from @from fwnode to @to fwnode.
+ */
+static void __fwnode_links_move_consumers(struct fwnode_handle *from,
+					  struct fwnode_handle *to)
+{
+	struct fwnode_link *link, *tmp;
+
+	list_for_each_entry_safe(link, tmp, &from->consumers, s_hook) {
+		__fwnode_link_add(link->consumer, to, link->flags);
+		__fwnode_link_del(link);
+	}
+}
+
+/**
+ * __fw_devlink_pickup_dangling_consumers - Pick up dangling consumers
+ * @fwnode: fwnode from which to pick up dangling consumers
+ * @new_sup: fwnode of new supplier
+ *
+ * If the @fwnode has a corresponding struct device and the device supports
+ * probing (that is, added to a bus), then we want to let fw_devlink create
+ * MANAGED device links to this device, so leave @fwnode and its descendant's
+ * fwnode links alone.
+ *
+ * Otherwise, move its consumers to the new supplier @new_sup.
+ */
+static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode,
+						   struct fwnode_handle *new_sup)
+{
+	struct fwnode_handle *child;
+
+	if (fwnode->dev && fwnode->dev->bus)
+		return;
+
+	fwnode->flags |= FWNODE_FLAG_NOT_DEVICE;
+	__fwnode_links_move_consumers(fwnode, new_sup);
+
+	fwnode_for_each_available_child_node(fwnode, child)
+		__fw_devlink_pickup_dangling_consumers(child, new_sup);
+}
+
 static DEFINE_MUTEX(device_links_lock);
 DEFINE_STATIC_SRCU(device_links_srcu);
 
@@ -205,47 +273,6 @@ static void device_link_remove_from_lists(struct device_link *link)
 	list_del_rcu(&link->s_node);
 	list_del_rcu(&link->c_node);
 }
-#else /* !CONFIG_SRCU */
-static DECLARE_RWSEM(device_links_lock);
-
-static inline void device_links_write_lock(void)
-{
-	down_write(&device_links_lock);
-}
-
-static inline void device_links_write_unlock(void)
-{
-	up_write(&device_links_lock);
-}
-
-int device_links_read_lock(void)
-{
-	down_read(&device_links_lock);
-	return 0;
-}
-
-void device_links_read_unlock(int not_used)
-{
-	up_read(&device_links_lock);
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-int device_links_read_lock_held(void)
-{
-	return lockdep_is_held(&device_links_lock);
-}
-#endif
-
-static inline void device_link_synchronize_removal(void)
-{
-}
-
-static void device_link_remove_from_lists(struct device_link *link)
-{
-	list_del(&link->s_node);
-	list_del(&link->c_node);
-}
-#endif /* !CONFIG_SRCU */
 
 static bool device_is_ancestor(struct device *dev, struct device *target)
 {
@@ -257,6 +284,14 @@ static bool device_is_ancestor(struct device *dev, struct device *target)
 	return false;
 }
 
+#define DL_MARKER_FLAGS		(DL_FLAG_INFERRED | \
+				 DL_FLAG_CYCLE | \
+				 DL_FLAG_MANAGED)
+bool device_link_flag_is_sync_state_only(u32 flags)
+{
+	return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY;
+}
+
 /**
  * device_is_dependent - Check if one device depends on another one
  * @dev: Device to check dependencies for.
@@ -265,7 +300,7 @@ static bool device_is_ancestor(struct device *dev, struct device *target)
  * Check if @target depends on @dev or any device dependent on it (its child or
  * its consumer etc).  Return 1 if that is the case or 0 otherwise.
  */
-int device_is_dependent(struct device *dev, void *target)
+static int device_is_dependent(struct device *dev, void *target)
 {
 	struct device_link *link;
 	int ret;
@@ -283,8 +318,7 @@ int device_is_dependent(struct device *dev, void *target)
 		return ret;
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if ((link->flags & ~DL_FLAG_INFERRED) ==
-		    (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+		if (device_link_flag_is_sync_state_only(link->flags))
 			continue;
 
 		if (link->consumer == target)
@@ -357,8 +391,7 @@ static int device_reorder_to_tail(struct device *dev, void *not_used)
 
 	device_for_each_child(dev, NULL, device_reorder_to_tail);
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if ((link->flags & ~DL_FLAG_INFERRED) ==
-		    (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+		if (device_link_flag_is_sync_state_only(link->flags))
 			continue;
 		device_reorder_to_tail(link->consumer, NULL);
 	}
@@ -427,9 +460,9 @@ static ssize_t auto_remove_on_show(struct device *dev,
 	struct device_link *link = to_devlink(dev);
 	const char *output;
 
-	if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+	if (device_link_test(link, DL_FLAG_AUTOREMOVE_SUPPLIER))
 		output = "supplier unbind";
-	else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
+	else if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER))
 		output = "consumer unbind";
 	else
 		output = "never";
@@ -443,7 +476,7 @@ static ssize_t runtime_pm_show(struct device *dev,
 {
 	struct device_link *link = to_devlink(dev);
 
-	return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME));
+	return sysfs_emit(buf, "%d\n", device_link_test(link, DL_FLAG_PM_RUNTIME));
 }
 static DEVICE_ATTR_RO(runtime_pm);
 
@@ -452,8 +485,7 @@ static ssize_t sync_state_only_show(struct device *dev,
 {
 	struct device_link *link = to_devlink(dev);
 
-	return sysfs_emit(buf, "%d\n",
-			  !!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
+	return sysfs_emit(buf, "%d\n", device_link_test(link, DL_FLAG_SYNC_STATE_ONLY));
 }
 static DEVICE_ATTR_RO(sync_state_only);
 
@@ -473,8 +505,18 @@ static void device_link_release_fn(struct work_struct *work)
 	/* Ensure that all references to the link object have been dropped. */
 	device_link_synchronize_removal();
 
-	while (refcount_dec_not_one(&link->rpm_active))
-		pm_runtime_put(link->supplier);
+	pm_runtime_release_supplier(link);
+	/*
+	 * If supplier_preactivated is set, the link has been dropped between
+	 * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls
+	 * in __driver_probe_device().  In that case, drop the supplier's
+	 * PM-runtime usage counter to remove the reference taken by
+	 * pm_runtime_get_suppliers().
+	 */
+	if (link->supplier_preactivated)
+		pm_runtime_put_noidle(link->supplier);
+
+	pm_request_idle(link->supplier);
 
 	put_device(link->consumer);
 	put_device(link->supplier);
@@ -489,36 +531,39 @@ static void devlink_dev_release(struct device *dev)
 	/*
 	 * It may take a while to complete this work because of the SRCU
 	 * synchronization in device_link_release_fn() and if the consumer or
-	 * supplier devices get deleted when it runs, so put it into the "long"
-	 * workqueue.
+	 * supplier devices get deleted when it runs, so put it into the
+	 * dedicated workqueue.
 	 */
-	queue_work(system_long_wq, &link->rm_work);
+	queue_work(device_link_wq, &link->rm_work);
 }
 
-static struct class devlink_class = {
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+	/*
+	 * devlink removal jobs are queued in the dedicated work queue.
+	 * To be sure that all removal jobs are terminated, ensure that any
+	 * scheduled work has run to completion.
+	 */
+	flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
+static const struct class devlink_class = {
 	.name = "devlink",
-	.owner = THIS_MODULE,
 	.dev_groups = devlink_groups,
 	.dev_release = devlink_dev_release,
 };
 
-static int devlink_add_symlinks(struct device *dev,
-				struct class_interface *class_intf)
+static int devlink_add_symlinks(struct device *dev)
 {
+	char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL;
 	int ret;
-	size_t len;
 	struct device_link *link = to_devlink(dev);
 	struct device *sup = link->supplier;
 	struct device *con = link->consumer;
-	char *buf;
-
-	len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)),
-		  strlen(dev_bus_name(con)) + strlen(dev_name(con)));
-	len += strlen(":");
-	len += strlen("supplier:") + 1;
-	buf = kzalloc(len, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
 
 	ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier");
 	if (ret)
@@ -528,57 +573,64 @@ static int devlink_add_symlinks(struct device *dev,
 	if (ret)
 		goto err_con;
 
-	snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
-	ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf);
+	buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
+	if (!buf_con) {
+		ret = -ENOMEM;
+		goto err_con_dev;
+	}
+
+	ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf_con);
 	if (ret)
 		goto err_con_dev;
 
-	snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
-	ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf);
+	buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+	if (!buf_sup) {
+		ret = -ENOMEM;
+		goto err_sup_dev;
+	}
+
+	ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf_sup);
 	if (ret)
 		goto err_sup_dev;
 
 	goto out;
 
 err_sup_dev:
-	snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
-	sysfs_remove_link(&sup->kobj, buf);
+	sysfs_remove_link(&sup->kobj, buf_con);
 err_con_dev:
 	sysfs_remove_link(&link->link_dev.kobj, "consumer");
 err_con:
 	sysfs_remove_link(&link->link_dev.kobj, "supplier");
 out:
-	kfree(buf);
 	return ret;
 }
 
-static void devlink_remove_symlinks(struct device *dev,
-				   struct class_interface *class_intf)
+static void devlink_remove_symlinks(struct device *dev)
 {
+	char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL;
 	struct device_link *link = to_devlink(dev);
-	size_t len;
 	struct device *sup = link->supplier;
 	struct device *con = link->consumer;
-	char *buf;
 
 	sysfs_remove_link(&link->link_dev.kobj, "consumer");
 	sysfs_remove_link(&link->link_dev.kobj, "supplier");
 
-	len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)),
-		  strlen(dev_bus_name(con)) + strlen(dev_name(con)));
-	len += strlen(":");
-	len += strlen("supplier:") + 1;
-	buf = kzalloc(len, GFP_KERNEL);
-	if (!buf) {
-		WARN(1, "Unable to properly free device link symlinks!\n");
-		return;
+	if (device_is_registered(con)) {
+		buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+		if (!buf_sup)
+			goto out;
+		sysfs_remove_link(&con->kobj, buf_sup);
 	}
 
-	snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
-	sysfs_remove_link(&con->kobj, buf);
-	snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
-	sysfs_remove_link(&sup->kobj, buf);
-	kfree(buf);
+	buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
+	if (!buf_con)
+		goto out;
+	sysfs_remove_link(&sup->kobj, buf_con);
+
+	return;
+
+out:
+	WARN(1, "Unable to properly free device link symlinks!\n");
 }
 
 static struct class_interface devlink_class_intf = {
@@ -607,7 +659,8 @@ postcore_initcall(devlink_class_init);
 			       DL_FLAG_AUTOREMOVE_SUPPLIER | \
 			       DL_FLAG_AUTOPROBE_CONSUMER  | \
 			       DL_FLAG_SYNC_STATE_ONLY | \
-			       DL_FLAG_INFERRED)
+			       DL_FLAG_INFERRED | \
+			       DL_FLAG_CYCLE)
 
 #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \
 			    DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)
@@ -618,6 +671,9 @@ postcore_initcall(devlink_class_init);
  * @supplier: Supplier end of the link.
  * @flags: Link flags.
  *
+ * Return: On success, a device_link struct will be returned.
+ *         On error or invalid flag settings, NULL will be returned.
+ *
  * The caller is responsible for the proper synchronization of the link creation
  * with runtime PM.  First, setting the DL_FLAG_PM_RUNTIME flag will cause the
  * runtime PM framework to take the link into account.  Second, if the
@@ -673,10 +729,9 @@ struct device_link *device_link_add(struct device *consumer,
 {
 	struct device_link *link;
 
-	if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS ||
+	if (!consumer || !supplier || consumer == supplier ||
+	    flags & ~DL_ADD_VALID_FLAGS ||
 	    (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
-	    (flags & DL_FLAG_SYNC_STATE_ONLY &&
-	     (flags & ~DL_FLAG_INFERRED) != DL_FLAG_SYNC_STATE_ONLY) ||
 	    (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
 	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
 		      DL_FLAG_AUTOREMOVE_SUPPLIER)))
@@ -692,6 +747,10 @@ struct device_link *device_link_add(struct device *consumer,
 	if (!(flags & DL_FLAG_STATELESS))
 		flags |= DL_FLAG_MANAGED;
 
+	if (flags & DL_FLAG_SYNC_STATE_ONLY &&
+	    !device_link_flag_is_sync_state_only(flags))
+		return NULL;
+
 	device_links_write_lock();
 	device_pm_lock();
 
@@ -732,12 +791,12 @@ struct device_link *device_link_add(struct device *consumer,
 		if (link->consumer != consumer)
 			continue;
 
-		if (link->flags & DL_FLAG_INFERRED &&
+		if (device_link_test(link, DL_FLAG_INFERRED) &&
 		    !(flags & DL_FLAG_INFERRED))
 			link->flags &= ~DL_FLAG_INFERRED;
 
 		if (flags & DL_FLAG_PM_RUNTIME) {
-			if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
+			if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) {
 				pm_runtime_new_link(consumer);
 				link->flags |= DL_FLAG_PM_RUNTIME;
 			}
@@ -747,8 +806,8 @@ struct device_link *device_link_add(struct device *consumer,
 
 		if (flags & DL_FLAG_STATELESS) {
 			kref_get(&link->kref);
-			if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
-			    !(link->flags & DL_FLAG_STATELESS)) {
+			if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY) &&
+			    !device_link_test(link, DL_FLAG_STATELESS)) {
 				link->flags |= DL_FLAG_STATELESS;
 				goto reorder;
 			} else {
@@ -763,7 +822,7 @@ struct device_link *device_link_add(struct device *consumer,
 		 * update the existing link to stay around longer.
 		 */
 		if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) {
-			if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
+			if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) {
 				link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;
 				link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER;
 			}
@@ -771,12 +830,12 @@ struct device_link *device_link_add(struct device *consumer,
 			link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER |
 					 DL_FLAG_AUTOREMOVE_SUPPLIER);
 		}
-		if (!(link->flags & DL_FLAG_MANAGED)) {
+		if (!device_link_test(link, DL_FLAG_MANAGED)) {
 			kref_get(&link->kref);
 			link->flags |= DL_FLAG_MANAGED;
 			device_link_init_status(link, consumer, supplier);
 		}
-		if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
+		if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY) &&
 		    !(flags & DL_FLAG_SYNC_STATE_ONLY)) {
 			link->flags &= ~DL_FLAG_SYNC_STATE_ONLY;
 			goto reorder;
@@ -806,9 +865,7 @@ struct device_link *device_link_add(struct device *consumer,
 		     dev_bus_name(supplier), dev_name(supplier),
 		     dev_bus_name(consumer), dev_name(consumer));
 	if (device_register(&link->link_dev)) {
-		put_device(consumer);
-		put_device(supplier);
-		kfree(link);
+		put_device(&link->link_dev);
 		link = NULL;
 		goto out;
 	}
@@ -882,8 +939,10 @@ static void __device_link_del(struct kref *kref)
 
 static void device_link_put_kref(struct device_link *link)
 {
-	if (link->flags & DL_FLAG_STATELESS)
+	if (device_link_test(link, DL_FLAG_STATELESS))
 		kref_put(&link->kref, __device_link_del);
+	else if (!device_is_registered(link->consumer))
+		__device_link_del(&link->kref);
 	else
 		WARN(1, "Unable to drop a managed device link reference\n");
 }
@@ -944,12 +1003,34 @@ static void device_links_missing_supplier(struct device *dev)
 		if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) {
 			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
 		} else {
-			WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
+			WARN_ON(!device_link_test(link, DL_FLAG_SYNC_STATE_ONLY));
 			WRITE_ONCE(link->status, DL_STATE_DORMANT);
 		}
 	}
 }
 
+static bool dev_is_best_effort(struct device *dev)
+{
+	return (fw_devlink_best_effort && dev->can_match) ||
+		(dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT));
+}
+
+static struct fwnode_handle *fwnode_links_check_suppliers(
+						struct fwnode_handle *fwnode)
+{
+	struct fwnode_link *link;
+
+	if (!fwnode || fw_devlink_is_permissive())
+		return NULL;
+
+	list_for_each_entry(link, &fwnode->suppliers, c_hook)
+		if (!(link->flags &
+		      (FWLINK_FLAG_CYCLE | FWLINK_FLAG_IGNORE)))
+			return link->supplier;
+
+	return NULL;
+}
+
 /**
  * device_links_check_suppliers - Check presence of supplier drivers.
  * @dev: Consumer device.
@@ -969,36 +1050,43 @@ static void device_links_missing_supplier(struct device *dev)
 int device_links_check_suppliers(struct device *dev)
 {
 	struct device_link *link;
-	int ret = 0;
+	int ret = 0, fwnode_ret = 0;
+	struct fwnode_handle *sup_fw;
 
 	/*
 	 * Device waiting for supplier to become available is not allowed to
 	 * probe.
 	 */
-	mutex_lock(&fwnode_link_lock);
-	if (dev->fwnode && !list_empty(&dev->fwnode->suppliers) &&
-	    !fw_devlink_is_permissive()) {
-		dev_dbg(dev, "probe deferral - wait for supplier %pfwP\n",
-			list_first_entry(&dev->fwnode->suppliers,
-			struct fwnode_link,
-			c_hook)->supplier);
-		mutex_unlock(&fwnode_link_lock);
-		return -EPROBE_DEFER;
+	scoped_guard(mutex, &fwnode_link_lock) {
+		sup_fw = fwnode_links_check_suppliers(dev->fwnode);
+		if (sup_fw) {
+			if (dev_is_best_effort(dev))
+				fwnode_ret = -EAGAIN;
+			else
+				return dev_err_probe(dev, -EPROBE_DEFER,
+						     "wait for supplier %pfwf\n", sup_fw);
+		}
 	}
-	mutex_unlock(&fwnode_link_lock);
 
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
 		if (link->status != DL_STATE_AVAILABLE &&
-		    !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) {
+		    !device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)) {
+
+			if (dev_is_best_effort(dev) &&
+			    device_link_test(link, DL_FLAG_INFERRED) &&
+			    !link->supplier->can_match) {
+				ret = -EAGAIN;
+				continue;
+			}
+
 			device_links_missing_supplier(dev);
-			dev_dbg(dev, "probe deferral - supplier %s not ready\n",
-				dev_name(link->supplier));
-			ret = -EPROBE_DEFER;
+			ret = dev_err_probe(dev, -EPROBE_DEFER,
+					    "supplier %s not ready\n", dev_name(link->supplier));
 			break;
 		}
 		WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
@@ -1006,7 +1094,8 @@ int device_links_check_suppliers(struct device *dev)
 	dev->links.status = DL_DEV_PROBING;
 
 	device_links_write_unlock();
-	return ret;
+
+	return ret ? ret : fwnode_ret;
 }
 
 /**
@@ -1038,7 +1127,7 @@ static void __device_links_queue_sync_state(struct device *dev,
 		return;
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 		if (link->status != DL_STATE_ACTIVE)
 			return;
@@ -1079,10 +1168,7 @@ static void device_links_flush_sync_list(struct list_head *list,
 		if (dev != dont_lock_dev)
 			device_lock(dev);
 
-		if (dev->bus->sync_state)
-			dev->bus->sync_state(dev);
-		else if (dev->driver && dev->driver->sync_state)
-			dev->driver->sync_state(dev);
+		dev_sync_state(dev);
 
 		if (dev != dont_lock_dev)
 			device_unlock(dev);
@@ -1153,7 +1239,8 @@ static ssize_t waiting_for_supplier_show(struct device *dev,
 	bool val;
 
 	device_lock(dev);
-	val = !list_empty(&dev->fwnode->suppliers);
+	scoped_guard(mutex, &fwnode_link_lock)
+		val = !!fwnode_links_check_suppliers(dev->fwnode);
 	device_unlock(dev);
 	return sysfs_emit(buf, "%u\n", val);
 }
@@ -1180,7 +1267,7 @@ void device_links_force_bind(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
 		if (link->status != DL_STATE_AVAILABLE) {
@@ -1216,23 +1303,32 @@ void device_links_driver_bound(struct device *dev)
 	 * them. So, fw_devlink no longer needs to create device links to any
 	 * of the device's suppliers.
 	 *
-	 * Also, if a child firmware node of this bound device is not added as
-	 * a device by now, assume it is never going to be added and make sure
-	 * other devices don't defer probe indefinitely by waiting for such a
-	 * child device.
+	 * Also, if a child firmware node of this bound device is not added as a
+	 * device by now, assume it is never going to be added. Make this bound
+	 * device the fallback supplier to the dangling consumers of the child
+	 * firmware node because this bound device is probably implementing the
+	 * child firmware node functionality and we don't want the dangling
+	 * consumers to defer probe indefinitely waiting for a device for the
+	 * child firmware node.
 	 */
 	if (dev->fwnode && dev->fwnode->dev == dev) {
 		struct fwnode_handle *child;
+
 		fwnode_links_purge_suppliers(dev->fwnode);
+
+		guard(mutex)(&fwnode_link_lock);
+
 		fwnode_for_each_available_child_node(dev->fwnode, child)
-			fw_devlink_purge_absent_suppliers(child);
+			__fw_devlink_pickup_dangling_consumers(child,
+							       dev->fwnode);
+		__fw_devlink_link_to_consumers(dev);
 	}
 	device_remove_file(dev, &dev_attr_waiting_for_supplier);
 
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
 		/*
@@ -1248,7 +1344,7 @@ void device_links_driver_bound(struct device *dev)
 		WARN_ON(link->status != DL_STATE_DORMANT);
 		WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
 
-		if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER)
+		if (device_link_test(link, DL_FLAG_AUTOPROBE_CONSUMER))
 			driver_deferred_probe_add(link->consumer);
 	}
 
@@ -1260,17 +1356,29 @@ void device_links_driver_bound(struct device *dev)
 	list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
 		struct device *supplier;
 
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
 		supplier = link->supplier;
-		if (link->flags & DL_FLAG_SYNC_STATE_ONLY) {
+		if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)) {
 			/*
 			 * When DL_FLAG_SYNC_STATE_ONLY is set, it means no
 			 * other DL_MANAGED_LINK_FLAGS have been set. So, it's
 			 * save to drop the managed link completely.
 			 */
 			device_link_drop_managed(link);
+		} else if (dev_is_best_effort(dev) &&
+			   device_link_test(link, DL_FLAG_INFERRED) &&
+			   link->status != DL_STATE_CONSUMER_PROBE &&
+			   !link->supplier->can_match) {
+			/*
+			 * When dev_is_best_effort() is true, we ignore device
+			 * links to suppliers that don't have a driver.  If the
+			 * consumer device still managed to probe, there's no
+			 * point in maintaining a device link in a weird state
+			 * (consumer probed before supplier). So delete it.
+			 */
+			device_link_drop_managed(link);
 		} else {
 			WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
 			WRITE_ONCE(link->status, DL_STATE_ACTIVE);
@@ -1312,10 +1420,10 @@ static void __device_links_no_driver(struct device *dev)
 	struct device_link *link, *ln;
 
 	list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
-		if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
+		if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) {
 			device_link_drop_managed(link);
 			continue;
 		}
@@ -1327,7 +1435,7 @@ static void __device_links_no_driver(struct device *dev)
 		if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) {
 			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
 		} else {
-			WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
+			WARN_ON(!device_link_test(link, DL_FLAG_SYNC_STATE_ONLY));
 			WRITE_ONCE(link->status, DL_STATE_DORMANT);
 		}
 	}
@@ -1352,7 +1460,7 @@ void device_links_no_driver(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
 		/*
@@ -1389,10 +1497,10 @@ void device_links_driver_cleanup(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
-		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
+		WARN_ON(device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER));
 		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
 
 		/*
@@ -1401,7 +1509,7 @@ void device_links_driver_cleanup(struct device *dev)
 		 * has moved to DL_STATE_SUPPLIER_UNBIND.
 		 */
 		if (link->status == DL_STATE_SUPPLIER_UNBIND &&
-		    link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+		    device_link_test(link, DL_FLAG_AUTOREMOVE_SUPPLIER))
 			device_link_drop_managed(link);
 
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
@@ -1435,7 +1543,7 @@ bool device_links_busy(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
+		if (!device_link_test(link, DL_FLAG_MANAGED))
 			continue;
 
 		if (link->status == DL_STATE_CONSUMER_PROBE
@@ -1477,8 +1585,8 @@ void device_links_unbind_consumers(struct device *dev)
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
 		enum device_link_state status;
 
-		if (!(link->flags & DL_FLAG_MANAGED) ||
-		    link->flags & DL_FLAG_SYNC_STATE_ONLY)
+		if (!device_link_test(link, DL_FLAG_MANAGED) ||
+		    device_link_test(link, DL_FLAG_SYNC_STATE_ONLY))
 			continue;
 
 		status = link->status;
@@ -1544,7 +1652,7 @@ static void device_links_purge(struct device *dev)
 #define FW_DEVLINK_FLAGS_RPM		(FW_DEVLINK_FLAGS_ON | \
 					 DL_FLAG_PM_RUNTIME)
 
-static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_ON;
+static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_RPM;
 static int __init fw_devlink_setup(char *arg)
 {
 	if (!arg)
@@ -1566,12 +1674,40 @@ early_param("fw_devlink", fw_devlink_setup);
 static bool fw_devlink_strict;
 static int __init fw_devlink_strict_setup(char *arg)
 {
-	return strtobool(arg, &fw_devlink_strict);
+	return kstrtobool(arg, &fw_devlink_strict);
 }
 early_param("fw_devlink.strict", fw_devlink_strict_setup);
 
-u32 fw_devlink_get_flags(void)
+#define FW_DEVLINK_SYNC_STATE_STRICT	0
+#define FW_DEVLINK_SYNC_STATE_TIMEOUT	1
+
+#ifndef CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT
+static int fw_devlink_sync_state;
+#else
+static int fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_TIMEOUT;
+#endif
+
+static int __init fw_devlink_sync_state_setup(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	if (strcmp(arg, "strict") == 0) {
+		fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_STRICT;
+		return 0;
+	} else if (strcmp(arg, "timeout") == 0) {
+		fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_TIMEOUT;
+		return 0;
+	}
+	return -EINVAL;
+}
+early_param("fw_devlink.sync_state", fw_devlink_sync_state_setup);
+
+static inline u32 fw_devlink_get_flags(u8 fwlink_flags)
 {
+	if (fwlink_flags & FWLINK_FLAG_CYCLE)
+		return FW_DEVLINK_FLAGS_PERMISSIVE | DL_FLAG_CYCLE;
+
 	return fw_devlink_flags;
 }
 
@@ -1606,10 +1742,10 @@ static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode)
 
 static void fw_devlink_relax_link(struct device_link *link)
 {
-	if (!(link->flags & DL_FLAG_INFERRED))
+	if (!device_link_test(link, DL_FLAG_INFERRED))
 		return;
 
-	if (link->flags == (DL_FLAG_MANAGED | FW_DEVLINK_FLAGS_PERMISSIVE))
+	if (device_link_flag_is_sync_state_only(link->flags))
 		return;
 
 	pm_runtime_drop_link(link);
@@ -1637,6 +1773,100 @@ void fw_devlink_drivers_done(void)
 	device_links_write_unlock();
 }
 
+static int fw_devlink_dev_sync_state(struct device *dev, void *data)
+{
+	struct device_link *link = to_devlink(dev);
+	struct device *sup = link->supplier;
+
+	if (!device_link_test(link, DL_FLAG_MANAGED) ||
+	    link->status == DL_STATE_ACTIVE || sup->state_synced ||
+	    !dev_has_sync_state(sup))
+		return 0;
+
+	if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) {
+		dev_info(sup, "sync_state() pending due to %s\n",
+			 dev_name(link->consumer));
+		return 0;
+	}
+
+	if (!list_empty(&sup->links.defer_sync))
+		return 0;
+
+	dev_warn(sup, "Timed out. Forcing sync_state()\n");
+	sup->state_synced = true;
+	get_device(sup);
+	list_add_tail(&sup->links.defer_sync, data);
+
+	return 0;
+}
+
+void fw_devlink_probing_done(void)
+{
+	LIST_HEAD(sync_list);
+
+	device_links_write_lock();
+	class_for_each_device(&devlink_class, NULL, &sync_list,
+			      fw_devlink_dev_sync_state);
+	device_links_write_unlock();
+	device_links_flush_sync_list(&sync_list, NULL);
+}
+
+/**
+ * wait_for_init_devices_probe - Try to probe any device needed for init
+ *
+ * Some devices might need to be probed and bound successfully before the kernel
+ * boot sequence can finish and move on to init/userspace. For example, a
+ * network interface might need to be bound to be able to mount a NFS rootfs.
+ *
+ * With fw_devlink=on by default, some of these devices might be blocked from
+ * probing because they are waiting on a optional supplier that doesn't have a
+ * driver. While fw_devlink will eventually identify such devices and unblock
+ * the probing automatically, it might be too late by the time it unblocks the
+ * probing of devices. For example, the IP4 autoconfig might timeout before
+ * fw_devlink unblocks probing of the network interface.
+ *
+ * This function is available to temporarily try and probe all devices that have
+ * a driver even if some of their suppliers haven't been added or don't have
+ * drivers.
+ *
+ * The drivers can then decide which of the suppliers are optional vs mandatory
+ * and probe the device if possible. By the time this function returns, all such
+ * "best effort" probes are guaranteed to be completed. If a device successfully
+ * probes in this mode, we delete all fw_devlink discovered dependencies of that
+ * device where the supplier hasn't yet probed successfully because they have to
+ * be optional dependencies.
+ *
+ * Any devices that didn't successfully probe go back to being treated as if
+ * this function was never called.
+ *
+ * This also means that some devices that aren't needed for init and could have
+ * waited for their optional supplier to probe (when the supplier's module is
+ * loaded later on) would end up probing prematurely with limited functionality.
+ * So call this function only when boot would fail without it.
+ */
+void __init wait_for_init_devices_probe(void)
+{
+	if (!fw_devlink_flags || fw_devlink_is_permissive())
+		return;
+
+	/*
+	 * Wait for all ongoing probes to finish so that the "best effort" is
+	 * only applied to devices that can't probe otherwise.
+	 */
+	wait_for_device_probe();
+
+	pr_info("Trying to probe devices needed for running init ...\n");
+	fw_devlink_best_effort = true;
+	driver_deferred_probe_trigger();
+
+	/*
+	 * Wait for all "best effort" probes to finish before going back to
+	 * normal enforcement.
+	 */
+	wait_for_device_probe();
+	fw_devlink_best_effort = false;
+}
+
 static void fw_devlink_unblock_consumers(struct device *dev)
 {
 	struct device_link *link;
@@ -1650,44 +1880,204 @@ static void fw_devlink_unblock_consumers(struct device *dev)
 	device_links_write_unlock();
 }
 
+static bool fwnode_init_without_drv(struct fwnode_handle *fwnode)
+{
+	struct device *dev;
+	bool ret;
+
+	if (!(fwnode->flags & FWNODE_FLAG_INITIALIZED))
+		return false;
+
+	dev = get_dev_from_fwnode(fwnode);
+	ret = !dev || dev->links.status == DL_DEV_NO_DRIVER;
+	put_device(dev);
+
+	return ret;
+}
+
+static bool fwnode_ancestor_init_without_drv(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *parent;
+
+	fwnode_for_each_parent_node(fwnode, parent) {
+		if (fwnode_init_without_drv(parent)) {
+			fwnode_handle_put(parent);
+			return true;
+		}
+	}
+
+	return false;
+}
+
 /**
- * fw_devlink_relax_cycle - Convert cyclic links to SYNC_STATE_ONLY links
- * @con: Device to check dependencies for.
- * @sup: Device to check against.
- *
- * Check if @sup depends on @con or any device dependent on it (its child or
- * its consumer etc).  When such a cyclic dependency is found, convert all
- * device links created solely by fw_devlink into SYNC_STATE_ONLY device links.
- * This is the equivalent of doing fw_devlink=permissive just between the
- * devices in the cycle. We need to do this because, at this point, fw_devlink
- * can't tell which of these dependencies is not a real dependency.
- *
- * Return 1 if a cycle is found. Otherwise, return 0.
+ * fwnode_is_ancestor_of - Test if @ancestor is ancestor of @child
+ * @ancestor: Firmware which is tested for being an ancestor
+ * @child: Firmware which is tested for being the child
+ *
+ * A node is considered an ancestor of itself too.
+ *
+ * Return: true if @ancestor is an ancestor of @child. Otherwise, returns false.
  */
-static int fw_devlink_relax_cycle(struct device *con, void *sup)
+static bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor,
+				  const struct fwnode_handle *child)
 {
-	struct device_link *link;
-	int ret;
+	struct fwnode_handle *parent;
 
-	if (con == sup)
-		return 1;
+	if (IS_ERR_OR_NULL(ancestor))
+		return false;
 
-	ret = device_for_each_child(con, sup, fw_devlink_relax_cycle);
-	if (ret)
-		return ret;
+	if (child == ancestor)
+		return true;
 
-	list_for_each_entry(link, &con->links.consumers, s_node) {
-		if ((link->flags & ~DL_FLAG_INFERRED) ==
-		    (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
-			continue;
+	fwnode_for_each_parent_node(child, parent) {
+		if (parent == ancestor) {
+			fwnode_handle_put(parent);
+			return true;
+		}
+	}
+	return false;
+}
 
-		if (!fw_devlink_relax_cycle(link->consumer, sup))
+/**
+ * fwnode_get_next_parent_dev - Find device of closest ancestor fwnode
+ * @fwnode: firmware node
+ *
+ * Given a firmware node (@fwnode), this function finds its closest ancestor
+ * firmware node that has a corresponding struct device and returns that struct
+ * device.
+ *
+ * The caller is responsible for calling put_device() on the returned device
+ * pointer.
+ *
+ * Return: a pointer to the device of the @fwnode's closest ancestor.
+ */
+static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *parent;
+	struct device *dev;
+
+	fwnode_for_each_parent_node(fwnode, parent) {
+		dev = get_dev_from_fwnode(parent);
+		if (dev) {
+			fwnode_handle_put(parent);
+			return dev;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * __fw_devlink_relax_cycles - Relax and mark dependency cycles.
+ * @con_handle: Potential consumer device fwnode.
+ * @sup_handle: Potential supplier's fwnode.
+ *
+ * Needs to be called with fwnode_lock and device link lock held.
+ *
+ * Check if @sup_handle or any of its ancestors or suppliers direct/indirectly
+ * depend on @con. This function can detect multiple cyles between @sup_handle
+ * and @con. When such dependency cycles are found, convert all device links
+ * created solely by fw_devlink into SYNC_STATE_ONLY device links. Also, mark
+ * all fwnode links in the cycle with FWLINK_FLAG_CYCLE so that when they are
+ * converted into a device link in the future, they are created as
+ * SYNC_STATE_ONLY device links. This is the equivalent of doing
+ * fw_devlink=permissive just between the devices in the cycle. We need to do
+ * this because, at this point, fw_devlink can't tell which of these
+ * dependencies is not a real dependency.
+ *
+ * Return true if one or more cycles were found. Otherwise, return false.
+ */
+static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle,
+				 struct fwnode_handle *sup_handle)
+{
+	struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL;
+	struct fwnode_link *link;
+	struct device_link *dev_link;
+	bool ret = false;
+
+	if (!sup_handle)
+		return false;
+
+	/*
+	 * We aren't trying to find all cycles. Just a cycle between con and
+	 * sup_handle.
+	 */
+	if (sup_handle->flags & FWNODE_FLAG_VISITED)
+		return false;
+
+	sup_handle->flags |= FWNODE_FLAG_VISITED;
+
+	/* Termination condition. */
+	if (sup_handle == con_handle) {
+		pr_debug("----- cycle: start -----\n");
+		ret = true;
+		goto out;
+	}
+
+	sup_dev = get_dev_from_fwnode(sup_handle);
+	con_dev = get_dev_from_fwnode(con_handle);
+	/*
+	 * If sup_dev is bound to a driver and @con hasn't started binding to a
+	 * driver, sup_dev can't be a consumer of @con. So, no need to check
+	 * further.
+	 */
+	if (sup_dev && sup_dev->links.status ==  DL_DEV_DRIVER_BOUND &&
+	    con_dev && con_dev->links.status == DL_DEV_NO_DRIVER) {
+		ret = false;
+		goto out;
+	}
+
+	list_for_each_entry(link, &sup_handle->suppliers, c_hook) {
+		if (link->flags & FWLINK_FLAG_IGNORE)
 			continue;
 
-		ret = 1;
+		if (__fw_devlink_relax_cycles(con_handle, link->supplier)) {
+			__fwnode_link_cycle(link);
+			ret = true;
+		}
+	}
 
-		fw_devlink_relax_link(link);
+	/*
+	 * Give priority to device parent over fwnode parent to account for any
+	 * quirks in how fwnodes are converted to devices.
+	 */
+	if (sup_dev)
+		par_dev = get_device(sup_dev->parent);
+	else
+		par_dev = fwnode_get_next_parent_dev(sup_handle);
+
+	if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) {
+		pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
+			 par_dev->fwnode);
+		ret = true;
 	}
+
+	if (!sup_dev)
+		goto out;
+
+	list_for_each_entry(dev_link, &sup_dev->links.suppliers, c_node) {
+		/*
+		 * Ignore a SYNC_STATE_ONLY flag only if it wasn't marked as
+		 * such due to a cycle.
+		 */
+		if (device_link_flag_is_sync_state_only(dev_link->flags) &&
+		    !device_link_test(dev_link, DL_FLAG_CYCLE))
+			continue;
+
+		if (__fw_devlink_relax_cycles(con_handle,
+					      dev_link->supplier->fwnode)) {
+			pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
+				 dev_link->supplier->fwnode);
+			fw_devlink_relax_link(dev_link);
+			dev_link->flags |= DL_FLAG_CYCLE;
+			ret = true;
+		}
+	}
+
+out:
+	sup_handle->flags &= ~FWNODE_FLAG_VISITED;
+	put_device(sup_dev);
+	put_device(con_dev);
+	put_device(par_dev);
 	return ret;
 }
 
@@ -1695,7 +2085,7 @@ static int fw_devlink_relax_cycle(struct device *con, void *sup)
  * fw_devlink_create_devlink - Create a device link from a consumer to fwnode
  * @con: consumer device for the device link
  * @sup_handle: fwnode handle of supplier
- * @flags: devlink flags
+ * @link: fwnode link that's being converted to a device link
  *
  * This function will try to create a device link between the consumer device
  * @con and the supplier device represented by @sup_handle.
@@ -1712,12 +2102,59 @@ static int fw_devlink_relax_cycle(struct device *con, void *sup)
  *  possible to do that in the future
  */
 static int fw_devlink_create_devlink(struct device *con,
-				     struct fwnode_handle *sup_handle, u32 flags)
+				     struct fwnode_handle *sup_handle,
+				     struct fwnode_link *link)
 {
 	struct device *sup_dev;
 	int ret = 0;
+	u32 flags;
+
+	if (link->flags & FWLINK_FLAG_IGNORE)
+		return 0;
+
+	/*
+	 * In some cases, a device P might also be a supplier to its child node
+	 * C. However, this would defer the probe of C until the probe of P
+	 * completes successfully. This is perfectly fine in the device driver
+	 * model. device_add() doesn't guarantee probe completion of the device
+	 * by the time it returns.
+	 *
+	 * However, there are a few drivers that assume C will finish probing
+	 * as soon as it's added and before P finishes probing. So, we provide
+	 * a flag to let fw_devlink know not to delay the probe of C until the
+	 * probe of P completes successfully.
+	 *
+	 * When such a flag is set, we can't create device links where P is the
+	 * supplier of C as that would delay the probe of C.
+	 */
+	if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD &&
+	    fwnode_is_ancestor_of(sup_handle, con->fwnode))
+		return -EINVAL;
+
+	/*
+	 * Don't try to optimize by not calling the cycle detection logic under
+	 * certain conditions. There's always some corner case that won't get
+	 * detected.
+	 */
+	device_links_write_lock();
+	if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) {
+		__fwnode_link_cycle(link);
+		pr_debug("----- cycle: end -----\n");
+		pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n",
+			link->consumer, sup_handle);
+	}
+	device_links_write_unlock();
+
+	if (con->fwnode == link->consumer)
+		flags = fw_devlink_get_flags(link->flags);
+	else
+		flags = FW_DEVLINK_FLAGS_PERMISSIVE;
+
+	if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE)
+		sup_dev = fwnode_get_next_parent_dev(sup_handle);
+	else
+		sup_dev = get_dev_from_fwnode(sup_handle);
 
-	sup_dev = get_dev_from_fwnode(sup_handle);
 	if (sup_dev) {
 		/*
 		 * If it's one of those drivers that don't actually bind to
@@ -1726,64 +2163,34 @@ static int fw_devlink_create_devlink(struct device *con,
 		 */
 		if (sup_dev->links.status == DL_DEV_NO_DRIVER &&
 		    sup_handle->flags & FWNODE_FLAG_INITIALIZED) {
+			dev_dbg(con,
+				"Not linking %pfwf - dev might never probe\n",
+				sup_handle);
 			ret = -EINVAL;
 			goto out;
 		}
 
-		/*
-		 * If this fails, it is due to cycles in device links.  Just
-		 * give up on this link and treat it as invalid.
-		 */
-		if (!device_link_add(con, sup_dev, flags) &&
-		    !(flags & DL_FLAG_SYNC_STATE_ONLY)) {
-			dev_info(con, "Fixing up cyclic dependency with %s\n",
-				 dev_name(sup_dev));
-			device_links_write_lock();
-			fw_devlink_relax_cycle(con, sup_dev);
-			device_links_write_unlock();
-			device_link_add(con, sup_dev,
-					FW_DEVLINK_FLAGS_PERMISSIVE);
+		if (con != sup_dev && !device_link_add(con, sup_dev, flags)) {
+			dev_err(con, "Failed to create device link (0x%x) with supplier %s for %pfwf\n",
+				flags, dev_name(sup_dev), link->consumer);
 			ret = -EINVAL;
 		}
 
 		goto out;
 	}
 
-	/* Supplier that's already initialized without a struct device. */
-	if (sup_handle->flags & FWNODE_FLAG_INITIALIZED)
-		return -EINVAL;
-
 	/*
-	 * DL_FLAG_SYNC_STATE_ONLY doesn't block probing and supports
-	 * cycles. So cycle detection isn't necessary and shouldn't be
-	 * done.
+	 * Supplier or supplier's ancestor already initialized without a struct
+	 * device or being probed by a driver.
 	 */
-	if (flags & DL_FLAG_SYNC_STATE_ONLY)
-		return -EAGAIN;
-
-	/*
-	 * If we can't find the supplier device from its fwnode, it might be
-	 * due to a cyclic dependency between fwnodes. Some of these cycles can
-	 * be broken by applying logic. Check for these types of cycles and
-	 * break them so that devices in the cycle probe properly.
-	 *
-	 * If the supplier's parent is dependent on the consumer, then
-	 * the consumer-supplier dependency is a false dependency. So,
-	 * treat it as an invalid link.
-	 */
-	sup_dev = fwnode_get_next_parent_dev(sup_handle);
-	if (sup_dev && device_is_dependent(con, sup_dev)) {
-		dev_dbg(con, "Not linking to %pfwP - False link\n",
+	if (fwnode_init_without_drv(sup_handle) ||
+	    fwnode_ancestor_init_without_drv(sup_handle)) {
+		dev_dbg(con, "Not linking %pfwf - might never become dev\n",
 			sup_handle);
-		ret = -EINVAL;
-	} else {
-		/*
-		 * Can't check for cycles or no cycles. So let's try
-		 * again later.
-		 */
-		ret = -EAGAIN;
+		return -EINVAL;
 	}
 
+	ret = -EAGAIN;
 out:
 	put_device(sup_dev);
 	return ret;
@@ -1811,7 +2218,6 @@ static void __fw_devlink_link_to_consumers(struct device *dev)
 	struct fwnode_link *link, *tmp;
 
 	list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) {
-		u32 dl_flags = fw_devlink_get_flags();
 		struct device *con_dev;
 		bool own_link = true;
 		int ret;
@@ -1841,21 +2247,18 @@ static void __fw_devlink_link_to_consumers(struct device *dev)
 				con_dev = NULL;
 			} else {
 				own_link = false;
-				dl_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
 			}
 		}
 
 		if (!con_dev)
 			continue;
 
-		ret = fw_devlink_create_devlink(con_dev, fwnode, dl_flags);
+		ret = fw_devlink_create_devlink(con_dev, fwnode, link);
 		put_device(con_dev);
 		if (!own_link || ret == -EAGAIN)
 			continue;
 
-		list_del(&link->s_hook);
-		list_del(&link->c_hook);
-		kfree(link);
+		__fwnode_link_del(link);
 	}
 }
 
@@ -1870,10 +2273,7 @@ static void __fw_devlink_link_to_consumers(struct device *dev)
  *
  * The function creates normal (non-SYNC_STATE_ONLY) device links between @dev
  * and the real suppliers of @dev. Once these device links are created, the
- * fwnode links are deleted. When such device links are successfully created,
- * this function is called recursively on those supplier devices. This is
- * needed to detect and break some invalid cycles in fwnode links.  See
- * fw_devlink_create_devlink() for more details.
+ * fwnode links are deleted.
  *
  * In addition, it also looks at all the suppliers of the entire fwnode tree
  * because some of the child devices of @dev that have not been added yet
@@ -1891,46 +2291,16 @@ static void __fw_devlink_link_to_suppliers(struct device *dev,
 	bool own_link = (dev->fwnode == fwnode);
 	struct fwnode_link *link, *tmp;
 	struct fwnode_handle *child = NULL;
-	u32 dl_flags;
-
-	if (own_link)
-		dl_flags = fw_devlink_get_flags();
-	else
-		dl_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
 
 	list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) {
 		int ret;
-		struct device *sup_dev;
 		struct fwnode_handle *sup = link->supplier;
 
-		ret = fw_devlink_create_devlink(dev, sup, dl_flags);
+		ret = fw_devlink_create_devlink(dev, sup, link);
 		if (!own_link || ret == -EAGAIN)
 			continue;
 
-		list_del(&link->s_hook);
-		list_del(&link->c_hook);
-		kfree(link);
-
-		/* If no device link was created, nothing more to do. */
-		if (ret)
-			continue;
-
-		/*
-		 * If a device link was successfully created to a supplier, we
-		 * now need to try and link the supplier to all its suppliers.
-		 *
-		 * This is needed to detect and delete false dependencies in
-		 * fwnode links that haven't been converted to a device link
-		 * yet. See comments in fw_devlink_create_devlink() for more
-		 * details on the false dependency.
-		 *
-		 * Without deleting these false dependencies, some devices will
-		 * never probe because they'll keep waiting for their false
-		 * dependency fwnode links to be converted to device links.
-		 */
-		sup_dev = get_dev_from_fwnode(sup);
-		__fw_devlink_link_to_suppliers(sup_dev, sup_dev->fwnode);
-		put_device(sup_dev);
+		__fwnode_link_del(link);
 	}
 
 	/*
@@ -1952,19 +2322,21 @@ static void fw_devlink_link_device(struct device *dev)
 
 	fw_devlink_parse_fwtree(fwnode);
 
-	mutex_lock(&fwnode_link_lock);
+	guard(mutex)(&fwnode_link_lock);
+
 	__fw_devlink_link_to_consumers(dev);
 	__fw_devlink_link_to_suppliers(dev, fwnode);
-	mutex_unlock(&fwnode_link_lock);
 }
 
 /* Device links support end. */
 
-int (*platform_notify)(struct device *dev) = NULL;
-int (*platform_notify_remove)(struct device *dev) = NULL;
 static struct kobject *dev_kobj;
-struct kobject *sysfs_dev_char_kobj;
-struct kobject *sysfs_dev_block_kobj;
+
+/* /sys/dev/char */
+static struct kobject *sysfs_dev_char_kobj;
+
+/* /sys/dev/block */
+static struct kobject *sysfs_dev_block_kobj;
 
 static DEFINE_MUTEX(device_hotplug_lock);
 
@@ -2000,24 +2372,18 @@ static inline int device_is_not_partition(struct device *dev)
 }
 #endif
 
-static int
-device_platform_notify(struct device *dev, enum kobject_action action)
+static void device_platform_notify(struct device *dev)
 {
-	int ret;
+	acpi_device_notify(dev);
 
-	ret = acpi_platform_notify(dev, action);
-	if (ret)
-		return ret;
+	software_node_notify(dev);
+}
 
-	ret = software_node_notify(dev, action);
-	if (ret)
-		return ret;
+static void device_platform_notify_remove(struct device *dev)
+{
+	software_node_notify_remove(dev);
 
-	if (platform_notify && action == KOBJ_ADD)
-		platform_notify(dev);
-	else if (platform_notify_remove && action == KOBJ_REMOVE)
-		platform_notify_remove(dev);
-	return 0;
+	acpi_device_notify_remove(dev);
 }
 
 /**
@@ -2140,7 +2506,7 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
 {
 	struct dev_ext_attribute *ea = to_ext_attr(attr);
 
-	if (strtobool(buf, ea->var) < 0)
+	if (kstrtobool(buf, ea->var) < 0)
 		return -EINVAL;
 
 	return size;
@@ -2156,6 +2522,15 @@ ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
 }
 EXPORT_SYMBOL_GPL(device_show_bool);
 
+ssize_t device_show_string(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)ea->var);
+}
+EXPORT_SYMBOL_GPL(device_show_string);
+
 /**
  * device_release - free device structure.
  * @kobj: device's kobject.
@@ -2194,26 +2569,26 @@ static void device_release(struct kobject *kobj)
 	kfree(p);
 }
 
-static const void *device_namespace(struct kobject *kobj)
+static const void *device_namespace(const struct kobject *kobj)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	const struct device *dev = kobj_to_dev(kobj);
 	const void *ns = NULL;
 
-	if (dev->class && dev->class->ns_type)
+	if (dev->class && dev->class->namespace)
 		ns = dev->class->namespace(dev);
 
 	return ns;
 }
 
-static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	const struct device *dev = kobj_to_dev(kobj);
 
 	if (dev->class && dev->class->get_ownership)
 		dev->class->get_ownership(dev, uid, gid);
 }
 
-static struct kobj_type device_ktype = {
+static const struct kobj_type device_ktype = {
 	.release	= device_release,
 	.sysfs_ops	= &dev_sysfs_ops,
 	.namespace	= device_namespace,
@@ -2221,12 +2596,12 @@ static struct kobj_type device_ktype = {
 };
 
 
-static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
+static int dev_uevent_filter(const struct kobject *kobj)
 {
-	struct kobj_type *ktype = get_ktype(kobj);
+	const struct kobj_type *ktype = get_ktype(kobj);
 
 	if (ktype == &device_ktype) {
-		struct device *dev = kobj_to_dev(kobj);
+		const struct device *dev = kobj_to_dev(kobj);
 		if (dev->bus)
 			return 1;
 		if (dev->class)
@@ -2235,9 +2610,9 @@ static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
+static const char *dev_uevent_name(const struct kobject *kobj)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	const struct device *dev = kobj_to_dev(kobj);
 
 	if (dev->bus)
 		return dev->bus->name;
@@ -2246,10 +2621,38 @@ static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
 	return NULL;
 }
 
-static int dev_uevent(struct kset *kset, struct kobject *kobj,
-		      struct kobj_uevent_env *env)
+/*
+ * Try filling "DRIVER=<name>" uevent variable for a device. Because this
+ * function may race with binding and unbinding the device from a driver,
+ * we need to be careful. Binding is generally safe, at worst we miss the
+ * fact that the device is already bound to a driver (but the driver
+ * information that is delivered through uevents is best-effort, it may
+ * become obsolete as soon as it is generated anyways). Unbinding is more
+ * risky as driver pointer is transitioning to NULL, so READ_ONCE() should
+ * be used to make sure we are dealing with the same pointer, and to
+ * ensure that driver structure is not going to disappear from under us
+ * we take bus' drivers klist lock. The assumption that only registered
+ * driver can be bound to a device, and to unregister a driver bus code
+ * will take the same lock.
+ */
+static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	struct subsys_private *sp = bus_to_subsys(dev->bus);
+
+	if (sp) {
+		scoped_guard(spinlock, &sp->klist_drivers.k_lock) {
+			struct device_driver *drv = READ_ONCE(dev->driver);
+			if (drv)
+				add_uevent_var(env, "DRIVER=%s", drv->name);
+		}
+
+		subsys_put(sp);
+	}
+}
+
+static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
+{
+	const struct device *dev = kobj_to_dev(kobj);
 	int retval = 0;
 
 	/* add device node properties if present */
@@ -2278,8 +2681,8 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	if (dev->type && dev->type->name)
 		add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
 
-	if (dev->driver)
-		add_uevent_var(env, "DRIVER=%s", dev->driver->name);
+	/* Add "DRIVER=%s" variable if the device is bound to a driver */
+	dev_driver_uevent(dev, env);
 
 	/* Add common DT information about the device */
 	of_device_uevent(dev, env);
@@ -2342,7 +2745,7 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
 
 	/* respect filter */
 	if (kset->uevent_ops && kset->uevent_ops->filter)
-		if (!kset->uevent_ops->filter(kset, &dev->kobj))
+		if (!kset->uevent_ops->filter(&dev->kobj))
 			goto out;
 
 	env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
@@ -2350,7 +2753,7 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
 		return -ENOMEM;
 
 	/* let the kset specific function add its keys */
-	retval = kset->uevent_ops->uevent(kset, &dev->kobj, env);
+	retval = kset->uevent_ops->uevent(&dev->kobj, env);
 	if (retval)
 		goto out;
 
@@ -2370,7 +2773,7 @@ static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
 	rc = kobject_synth_uevent(&dev->kobj, buf, count);
 
 	if (rc) {
-		dev_err(dev, "uevent: failed to send synthetic uevent\n");
+		dev_err(dev, "uevent: failed to send synthetic uevent: %d\n", rc);
 		return rc;
 	}
 
@@ -2395,7 +2798,7 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr,
 	bool val;
 	int ret;
 
-	ret = strtobool(buf, &val);
+	ret = kstrtobool(buf, &val);
 	if (ret < 0)
 		return ret;
 
@@ -2446,11 +2849,6 @@ union device_attr_group_devres {
 	const struct attribute_group **groups;
 };
 
-static int devm_attr_group_match(struct device *dev, void *res, void *data)
-{
-	return ((union device_attr_group_devres *)res)->group == data;
-}
-
 static void devm_attr_group_remove(struct device *dev, void *res)
 {
 	union device_attr_group_devres *devres = res;
@@ -2460,15 +2858,6 @@ static void devm_attr_group_remove(struct device *dev, void *res)
 	sysfs_remove_group(&dev->kobj, group);
 }
 
-static void devm_attr_groups_remove(struct device *dev, void *res)
-{
-	union device_attr_group_devres *devres = res;
-	const struct attribute_group **groups = devres->groups;
-
-	dev_dbg(dev, "%s: removing groups %p\n", __func__, groups);
-	sysfs_remove_groups(&dev->kobj, groups);
-}
-
 /**
  * devm_device_add_group - given a device, create a managed attribute group
  * @dev:	The device to create the group for
@@ -2501,79 +2890,9 @@ int devm_device_add_group(struct device *dev, const struct attribute_group *grp)
 }
 EXPORT_SYMBOL_GPL(devm_device_add_group);
 
-/**
- * devm_device_remove_group: remove a managed group from a device
- * @dev:	device to remove the group from
- * @grp:	group to remove
- *
- * This function removes a group of attributes from a device. The attributes
- * previously have to have been created for this group, otherwise it will fail.
- */
-void devm_device_remove_group(struct device *dev,
-			      const struct attribute_group *grp)
-{
-	WARN_ON(devres_release(dev, devm_attr_group_remove,
-			       devm_attr_group_match,
-			       /* cast away const */ (void *)grp));
-}
-EXPORT_SYMBOL_GPL(devm_device_remove_group);
-
-/**
- * devm_device_add_groups - create a bunch of managed attribute groups
- * @dev:	The device to create the group for
- * @groups:	The attribute groups to create, NULL terminated
- *
- * This function creates a bunch of managed attribute groups.  If an error
- * occurs when creating a group, all previously created groups will be
- * removed, unwinding everything back to the original state when this
- * function was called.  It will explicitly warn and error if any of the
- * attribute files being created already exist.
- *
- * Returns 0 on success or error code from sysfs_create_group on failure.
- */
-int devm_device_add_groups(struct device *dev,
-			   const struct attribute_group **groups)
-{
-	union device_attr_group_devres *devres;
-	int error;
-
-	devres = devres_alloc(devm_attr_groups_remove,
-			      sizeof(*devres), GFP_KERNEL);
-	if (!devres)
-		return -ENOMEM;
-
-	error = sysfs_create_groups(&dev->kobj, groups);
-	if (error) {
-		devres_free(devres);
-		return error;
-	}
-
-	devres->groups = groups;
-	devres_add(dev, devres);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devm_device_add_groups);
-
-/**
- * devm_device_remove_groups - remove a list of managed groups
- *
- * @dev:	The device for the groups to be removed from
- * @groups:	NULL terminated list of groups to be removed
- *
- * If groups is not NULL, remove the specified groups from the device.
- */
-void devm_device_remove_groups(struct device *dev,
-			       const struct attribute_group **groups)
-{
-	WARN_ON(devres_release(dev, devm_attr_groups_remove,
-			       devm_attr_group_match,
-			       /* cast away const */ (void *)groups));
-}
-EXPORT_SYMBOL_GPL(devm_device_remove_groups);
-
 static int device_add_attrs(struct device *dev)
 {
-	struct class *class = dev->class;
+	const struct class *class = dev->class;
 	const struct device_type *type = dev->type;
 	int error;
 
@@ -2611,8 +2930,17 @@ static int device_add_attrs(struct device *dev)
 			goto err_remove_dev_waiting_for_supplier;
 	}
 
+	if (dev_add_physical_location(dev)) {
+		error = device_add_group(dev,
+			&dev_attr_physical_location_group);
+		if (error)
+			goto err_remove_dev_removable;
+	}
+
 	return 0;
 
+ err_remove_dev_removable:
+	device_remove_file(dev, &dev_attr_removable);
  err_remove_dev_waiting_for_supplier:
 	device_remove_file(dev, &dev_attr_waiting_for_supplier);
  err_remove_dev_online:
@@ -2631,9 +2959,14 @@ static int device_add_attrs(struct device *dev)
 
 static void device_remove_attrs(struct device *dev)
 {
-	struct class *class = dev->class;
+	const struct class *class = dev->class;
 	const struct device_type *type = dev->type;
 
+	if (dev->physical_location) {
+		device_remove_group(dev, &dev_attr_physical_location_group);
+		kfree(dev->physical_location);
+	}
+
 	device_remove_file(dev, &dev_attr_removable);
 	device_remove_file(dev, &dev_attr_waiting_for_supplier);
 	device_remove_file(dev, &dev_attr_online);
@@ -2826,17 +3159,11 @@ void device_initialize(struct device *dev)
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	mutex_init(&dev->mutex);
-#ifdef CONFIG_PROVE_LOCKING
-	mutex_init(&dev->lockdep_mutex);
-#endif
 	lockdep_set_novalidate_class(&dev->mutex);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
 	device_pm_init(dev);
-	set_dev_node(dev, -1);
-#ifdef CONFIG_GENERIC_MSI_IRQ
-	INIT_LIST_HEAD(&dev->msi_list);
-#endif
+	set_dev_node(dev, NUMA_NO_NODE);
 	INIT_LIST_HEAD(&dev->links.consumers);
 	INIT_LIST_HEAD(&dev->links.suppliers);
 	INIT_LIST_HEAD(&dev->links.defer_sync);
@@ -2846,10 +3173,11 @@ void device_initialize(struct device *dev)
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
 	dev->dma_coherent = dma_default_coherent;
 #endif
+	swiotlb_dev_init(dev);
 }
 EXPORT_SYMBOL_GPL(device_initialize);
 
-struct kobject *virtual_device_parent(struct device *dev)
+struct kobject *virtual_device_parent(void)
 {
 	static struct kobject *virtual_dir = NULL;
 
@@ -2862,7 +3190,7 @@ struct kobject *virtual_device_parent(struct device *dev)
 
 struct class_dir {
 	struct kobject kobj;
-	struct class *class;
+	const struct class *class;
 };
 
 #define to_class_dir(obj) container_of(obj, struct class_dir, kobj)
@@ -2874,20 +3202,20 @@ static void class_dir_release(struct kobject *kobj)
 }
 
 static const
-struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj)
+struct kobj_ns_type_operations *class_dir_child_ns_type(const struct kobject *kobj)
 {
-	struct class_dir *dir = to_class_dir(kobj);
+	const struct class_dir *dir = to_class_dir(kobj);
 	return dir->class->ns_type;
 }
 
-static struct kobj_type class_dir_ktype = {
+static const struct kobj_type class_dir_ktype = {
 	.release	= class_dir_release,
 	.sysfs_ops	= &kobj_sysfs_ops,
 	.child_ns_type	= class_dir_child_ns_type
 };
 
-static struct kobject *
-class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
+static struct kobject *class_dir_create_and_add(struct subsys_private *sp,
+						struct kobject *parent_kobj)
 {
 	struct class_dir *dir;
 	int retval;
@@ -2896,12 +3224,12 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
 	if (!dir)
 		return ERR_PTR(-ENOMEM);
 
-	dir->class = class;
+	dir->class = sp->class;
 	kobject_init(&dir->kobj, &class_dir_ktype);
 
-	dir->kobj.kset = &class->p->glue_dirs;
+	dir->kobj.kset = &sp->glue_dirs;
 
-	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
+	retval = kobject_add(&dir->kobj, parent_kobj, "%s", sp->class->name);
 	if (retval < 0) {
 		kobject_put(&dir->kobj);
 		return ERR_PTR(retval);
@@ -2914,57 +3242,61 @@ static DEFINE_MUTEX(gdp_mutex);
 static struct kobject *get_device_parent(struct device *dev,
 					 struct device *parent)
 {
-	if (dev->class) {
-		struct kobject *kobj = NULL;
+	struct subsys_private *sp = class_to_subsys(dev->class);
+	struct kobject *kobj = NULL;
+
+	if (sp) {
 		struct kobject *parent_kobj;
 		struct kobject *k;
 
-#ifdef CONFIG_BLOCK
-		/* block disks show up in /sys/block */
-		if (sysfs_deprecated && dev->class == &block_class) {
-			if (parent && parent->class == &block_class)
-				return &parent->kobj;
-			return &block_class.p->subsys.kobj;
-		}
-#endif
-
 		/*
 		 * If we have no parent, we live in "virtual".
 		 * Class-devices with a non class-device as parent, live
 		 * in a "glue" directory to prevent namespace collisions.
 		 */
 		if (parent == NULL)
-			parent_kobj = virtual_device_parent(dev);
-		else if (parent->class && !dev->class->ns_type)
+			parent_kobj = virtual_device_parent();
+		else if (parent->class && !dev->class->ns_type) {
+			subsys_put(sp);
 			return &parent->kobj;
-		else
+		} else {
 			parent_kobj = &parent->kobj;
+		}
 
 		mutex_lock(&gdp_mutex);
 
 		/* find our class-directory at the parent and reference it */
-		spin_lock(&dev->class->p->glue_dirs.list_lock);
-		list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry)
+		spin_lock(&sp->glue_dirs.list_lock);
+		list_for_each_entry(k, &sp->glue_dirs.list, entry)
 			if (k->parent == parent_kobj) {
 				kobj = kobject_get(k);
 				break;
 			}
-		spin_unlock(&dev->class->p->glue_dirs.list_lock);
+		spin_unlock(&sp->glue_dirs.list_lock);
 		if (kobj) {
 			mutex_unlock(&gdp_mutex);
+			subsys_put(sp);
 			return kobj;
 		}
 
 		/* or create a new class-directory at the parent device */
-		k = class_dir_create_and_add(dev->class, parent_kobj);
+		k = class_dir_create_and_add(sp, parent_kobj);
 		/* do not emit an uevent for this simple "glue" directory */
 		mutex_unlock(&gdp_mutex);
+		subsys_put(sp);
 		return k;
 	}
 
 	/* subsystems can specify a default root directory for their devices */
-	if (!parent && dev->bus && dev->bus->dev_root)
-		return &dev->bus->dev_root->kobj;
+	if (!parent && dev->bus) {
+		struct device *dev_root = bus_get_dev_root(dev->bus);
+
+		if (dev_root) {
+			kobj = &dev_root->kobj;
+			put_device(dev_root);
+			return kobj;
+		}
+	}
 
 	if (parent)
 		return &parent->kobj;
@@ -2974,10 +3306,23 @@ static struct kobject *get_device_parent(struct device *dev,
 static inline bool live_in_glue_dir(struct kobject *kobj,
 				    struct device *dev)
 {
-	if (!kobj || !dev->class ||
-	    kobj->kset != &dev->class->p->glue_dirs)
+	struct subsys_private *sp;
+	bool retval;
+
+	if (!kobj || !dev->class)
 		return false;
-	return true;
+
+	sp = class_to_subsys(dev->class);
+	if (!sp)
+		return false;
+
+	if (kobj->kset == &sp->glue_dirs)
+		retval = true;
+	else
+		retval = false;
+
+	subsys_put(sp);
+	return retval;
 }
 
 static inline struct kobject *get_glue_dir(struct device *dev)
@@ -2985,6 +3330,23 @@ static inline struct kobject *get_glue_dir(struct device *dev)
 	return dev->kobj.parent;
 }
 
+/**
+ * kobject_has_children - Returns whether a kobject has children.
+ * @kobj: the object to test
+ *
+ * This will return whether a kobject has other kobjects as children.
+ *
+ * It does NOT account for the presence of attribute files, only sub
+ * directories. It also assumes there is no concurrent addition or
+ * removal of such children, and thus relies on external locking.
+ */
+static inline bool kobject_has_children(struct kobject *kobj)
+{
+	WARN_ON_ONCE(kref_read(&kobj->kref) == 0);
+
+	return kobj->sd && kobj->sd->dir.subdirs;
+}
+
 /*
  * make sure cleaning up dir as the last step, we need to make
  * sure .release handler of kobject is run with holding the
@@ -3057,6 +3419,7 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 static int device_add_class_symlinks(struct device *dev)
 {
 	struct device_node *of_node = dev_of_node(dev);
+	struct subsys_private *sp;
 	int error;
 
 	if (of_node) {
@@ -3066,12 +3429,11 @@ static int device_add_class_symlinks(struct device *dev)
 		/* An error here doesn't warrant bringing down the device */
 	}
 
-	if (!dev->class)
+	sp = class_to_subsys(dev->class);
+	if (!sp)
 		return 0;
 
-	error = sysfs_create_link(&dev->kobj,
-				  &dev->class->p->subsys.kobj,
-				  "subsystem");
+	error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem");
 	if (error)
 		goto out_devnode;
 
@@ -3082,46 +3444,38 @@ static int device_add_class_symlinks(struct device *dev)
 			goto out_subsys;
 	}
 
-#ifdef CONFIG_BLOCK
-	/* /sys/block has directories and does not need symlinks */
-	if (sysfs_deprecated && dev->class == &block_class)
-		return 0;
-#endif
-
 	/* link in the class directory pointing to the device */
-	error = sysfs_create_link(&dev->class->p->subsys.kobj,
-				  &dev->kobj, dev_name(dev));
+	error = sysfs_create_link(&sp->subsys.kobj, &dev->kobj, dev_name(dev));
 	if (error)
 		goto out_device;
-
-	return 0;
+	goto exit;
 
 out_device:
 	sysfs_remove_link(&dev->kobj, "device");
-
 out_subsys:
 	sysfs_remove_link(&dev->kobj, "subsystem");
 out_devnode:
 	sysfs_remove_link(&dev->kobj, "of_node");
+exit:
+	subsys_put(sp);
 	return error;
 }
 
 static void device_remove_class_symlinks(struct device *dev)
 {
+	struct subsys_private *sp = class_to_subsys(dev->class);
+
 	if (dev_of_node(dev))
 		sysfs_remove_link(&dev->kobj, "of_node");
 
-	if (!dev->class)
+	if (!sp)
 		return;
 
 	if (dev->parent && device_is_not_partition(dev))
 		sysfs_remove_link(&dev->kobj, "device");
 	sysfs_remove_link(&dev->kobj, "subsystem");
-#ifdef CONFIG_BLOCK
-	if (sysfs_deprecated && dev->class == &block_class)
-		return;
-#endif
-	sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev));
+	sysfs_delete_link(&sp->subsys.kobj, &dev->kobj, dev_name(dev));
+	subsys_put(sp);
 }
 
 /**
@@ -3141,27 +3495,13 @@ int dev_set_name(struct device *dev, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(dev_set_name);
 
-/**
- * device_to_dev_kobj - select a /sys/dev/ directory for the device
- * @dev: device
- *
- * By default we select char/ for new entries.  Setting class->dev_obj
- * to NULL prevents an entry from being created.  class->dev_kobj must
- * be set (or cleared) before any devices are registered to the class
- * otherwise device_create_sys_dev_entry() and
- * device_remove_sys_dev_entry() will disagree about the presence of
- * the link.
- */
+/* select a /sys/dev/ directory for the device */
 static struct kobject *device_to_dev_kobj(struct device *dev)
 {
-	struct kobject *kobj;
-
-	if (dev->class)
-		kobj = dev->class->dev_kobj;
+	if (is_blockdev(dev))
+		return sysfs_dev_block_kobj;
 	else
-		kobj = sysfs_dev_char_kobj;
-
-	return kobj;
+		return sysfs_dev_char_kobj;
 }
 
 static int device_create_sys_dev_entry(struct device *dev)
@@ -3230,6 +3570,7 @@ static int device_private_init(struct device *dev)
  */
 int device_add(struct device *dev)
 {
+	struct subsys_private *sp;
 	struct device *parent;
 	struct kobject *kobj;
 	struct class_interface *class_intf;
@@ -3252,18 +3593,19 @@ int device_add(struct device *dev)
 	 * the name, and force the use of dev_name()
 	 */
 	if (dev->init_name) {
-		dev_set_name(dev, "%s", dev->init_name);
+		error = dev_set_name(dev, "%s", dev->init_name);
 		dev->init_name = NULL;
 	}
 
+	if (dev_name(dev))
+		error = 0;
 	/* subsystems can specify simple device enumeration */
-	if (!dev_name(dev) && dev->bus && dev->bus->dev_name)
-		dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
-
-	if (!dev_name(dev)) {
+	else if (dev->bus && dev->bus->dev_name)
+		error = dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
+	else
 		error = -EINVAL;
+	if (error)
 		goto name_error;
-	}
 
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
 
@@ -3284,14 +3626,12 @@ int device_add(struct device *dev)
 	/* we require the name to be set before, and pass NULL */
 	error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
 	if (error) {
-		glue_dir = get_glue_dir(dev);
+		glue_dir = kobj;
 		goto Error;
 	}
 
 	/* notify platform of device entry */
-	error = device_platform_notify(dev, KOBJ_ADD);
-	if (error)
-		goto platform_error;
+	device_platform_notify(dev);
 
 	error = device_create_file(dev, &dev_attr_uevent);
 	if (error)
@@ -3326,10 +3666,7 @@ int device_add(struct device *dev)
 	/* Notify clients of device addition.  This call must come
 	 * after dpm_sysfs_add() and before kobject_uevent().
 	 */
-	if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_ADD_DEVICE, dev);
-
+	bus_notify(dev, BUS_NOTIFY_ADD_DEVICE);
 	kobject_uevent(&dev->kobj, KOBJ_ADD);
 
 	/*
@@ -3363,18 +3700,18 @@ int device_add(struct device *dev)
 		klist_add_tail(&dev->p->knode_parent,
 			       &parent->p->klist_children);
 
-	if (dev->class) {
-		mutex_lock(&dev->class->p->mutex);
+	sp = class_to_subsys(dev->class);
+	if (sp) {
+		mutex_lock(&sp->mutex);
 		/* tie the class to the device */
-		klist_add_tail(&dev->p->knode_class,
-			       &dev->class->p->klist_devices);
+		klist_add_tail(&dev->p->knode_class, &sp->klist_devices);
 
 		/* notify any interfaces that the device is here */
-		list_for_each_entry(class_intf,
-				    &dev->class->p->interfaces, node)
+		list_for_each_entry(class_intf, &sp->interfaces, node)
 			if (class_intf->add_dev)
-				class_intf->add_dev(dev, class_intf);
-		mutex_unlock(&dev->class->p->mutex);
+				class_intf->add_dev(dev);
+		mutex_unlock(&sp->mutex);
+		subsys_put(sp);
 	}
 done:
 	put_device(dev);
@@ -3386,6 +3723,7 @@ done:
 	device_pm_remove(dev);
 	dpm_sysfs_remove(dev);
  DPMError:
+	device_set_driver(dev, NULL);
 	bus_remove_device(dev);
  BusError:
 	device_remove_attrs(dev);
@@ -3394,8 +3732,7 @@ done:
  SymlinkError:
 	device_remove_file(dev, &dev_attr_uevent);
  attrError:
-	device_platform_notify(dev, KOBJ_REMOVE);
-platform_error:
+	device_platform_notify_remove(dev);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	glue_dir = get_glue_dir(dev);
 	kobject_del(&dev->kobj);
@@ -3494,6 +3831,7 @@ EXPORT_SYMBOL_GPL(kill_device);
  */
 void device_del(struct device *dev)
 {
+	struct subsys_private *sp;
 	struct device *parent = dev->parent;
 	struct kobject *glue_dir = NULL;
 	struct class_interface *class_intf;
@@ -3510,9 +3848,7 @@ void device_del(struct device *dev)
 	 * before dpm_sysfs_remove().
 	 */
 	noio_flag = memalloc_noio_save();
-	if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_DEL_DEVICE, dev);
+	bus_notify(dev, BUS_NOTIFY_DEL_DEVICE);
 
 	dpm_sysfs_remove(dev);
 	if (parent)
@@ -3522,31 +3858,41 @@ void device_del(struct device *dev)
 		device_remove_sys_dev_entry(dev);
 		device_remove_file(dev, &dev_attr_dev);
 	}
-	if (dev->class) {
+
+	sp = class_to_subsys(dev->class);
+	if (sp) {
 		device_remove_class_symlinks(dev);
 
-		mutex_lock(&dev->class->p->mutex);
+		mutex_lock(&sp->mutex);
 		/* notify any interfaces that the device is now gone */
-		list_for_each_entry(class_intf,
-				    &dev->class->p->interfaces, node)
+		list_for_each_entry(class_intf, &sp->interfaces, node)
 			if (class_intf->remove_dev)
-				class_intf->remove_dev(dev, class_intf);
+				class_intf->remove_dev(dev);
 		/* remove the device from the class list */
 		klist_del(&dev->p->knode_class);
-		mutex_unlock(&dev->class->p->mutex);
+		mutex_unlock(&sp->mutex);
+		subsys_put(sp);
 	}
 	device_remove_file(dev, &dev_attr_uevent);
 	device_remove_attrs(dev);
 	bus_remove_device(dev);
 	device_pm_remove(dev);
 	driver_deferred_probe_del(dev);
-	device_platform_notify(dev, KOBJ_REMOVE);
-	device_remove_properties(dev);
+	device_platform_notify_remove(dev);
 	device_links_purge(dev);
 
-	if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_REMOVED_DEVICE, dev);
+	/*
+	 * If a device does not have a driver attached, we need to clean
+	 * up any managed resources. We do this in device_release(), but
+	 * it's never called (and we leak the device) if a managed
+	 * resource holds a reference to the device. So release all
+	 * managed resources here, like we do in driver_detach(). We
+	 * still need to do so again in device_release() in case someone
+	 * adds a new resource after this point, though.
+	 */
+	devres_release_all(dev);
+
+	bus_notify(dev, BUS_NOTIFY_REMOVED_DEVICE);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	glue_dir = get_glue_dir(dev);
 	kobject_del(&dev->kobj);
@@ -3614,7 +3960,7 @@ static struct device *next_device(struct klist_iter *i)
  * a name. This memory is returned in tmp and needs to be
  * freed by the caller.
  */
-const char *device_get_devnode(struct device *dev,
+const char *device_get_devnode(const struct device *dev,
 			       umode_t *mode, kuid_t *uid, kgid_t *gid,
 			       const char **tmp)
 {
@@ -3639,18 +3985,17 @@ const char *device_get_devnode(struct device *dev,
 		return dev_name(dev);
 
 	/* replace '!' in the name with '/' */
-	s = kstrdup(dev_name(dev), GFP_KERNEL);
+	s = kstrdup_and_replace(dev_name(dev), '!', '/', GFP_KERNEL);
 	if (!s)
 		return NULL;
-	strreplace(s, '!', '/');
 	return *tmp = s;
 }
 
 /**
  * device_for_each_child - device child iterator.
  * @parent: parent struct device.
- * @fn: function to be called for each device.
  * @data: data for the callback.
+ * @fn: function to be called for each device.
  *
  * Iterate over @parent's child devices, and call @fn for each,
  * passing it @data.
@@ -3659,13 +4004,13 @@ const char *device_get_devnode(struct device *dev,
  * other than 0, we break out and return that value.
  */
 int device_for_each_child(struct device *parent, void *data,
-			  int (*fn)(struct device *dev, void *data))
+			  device_iter_t fn)
 {
 	struct klist_iter i;
 	struct device *child;
 	int error = 0;
 
-	if (!parent->p)
+	if (!parent || !parent->p)
 		return 0;
 
 	klist_iter_init(&parent->p->klist_children, &i);
@@ -3679,8 +4024,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child);
 /**
  * device_for_each_child_reverse - device child iterator in reversed order.
  * @parent: parent struct device.
- * @fn: function to be called for each device.
  * @data: data for the callback.
+ * @fn: function to be called for each device.
  *
  * Iterate over @parent's child devices, and call @fn for each,
  * passing it @data.
@@ -3689,13 +4034,13 @@ EXPORT_SYMBOL_GPL(device_for_each_child);
  * other than 0, we break out and return that value.
  */
 int device_for_each_child_reverse(struct device *parent, void *data,
-				  int (*fn)(struct device *dev, void *data))
+				  device_iter_t fn)
 {
 	struct klist_iter i;
 	struct device *child;
 	int error = 0;
 
-	if (!parent->p)
+	if (!parent || !parent->p)
 		return 0;
 
 	klist_iter_init(&parent->p->klist_children, &i);
@@ -3707,67 +4052,77 @@ int device_for_each_child_reverse(struct device *parent, void *data,
 EXPORT_SYMBOL_GPL(device_for_each_child_reverse);
 
 /**
- * device_find_child - device iterator for locating a particular device.
- * @parent: parent struct device
- * @match: Callback function to check device
- * @data: Data to pass to match function
- *
- * This is similar to the device_for_each_child() function above, but it
- * returns a reference to a device that is 'found' for later use, as
- * determined by the @match callback.
+ * device_for_each_child_reverse_from - device child iterator in reversed order.
+ * @parent: parent struct device.
+ * @from: optional starting point in child list
+ * @data: data for the callback.
+ * @fn: function to be called for each device.
  *
- * The callback should return 0 if the device doesn't match and non-zero
- * if it does.  If the callback returns non-zero and a reference to the
- * current device can be obtained, this function will return to the caller
- * and not iterate over any more devices.
+ * Iterate over @parent's child devices, starting at @from, and call @fn
+ * for each, passing it @data. This helper is identical to
+ * device_for_each_child_reverse() when @from is NULL.
  *
- * NOTE: you will need to drop the reference with put_device() after use.
+ * @fn is checked each iteration. If it returns anything other than 0,
+ * iteration stop and that value is returned to the caller of
+ * device_for_each_child_reverse_from();
  */
-struct device *device_find_child(struct device *parent, void *data,
-				 int (*match)(struct device *dev, void *data))
+int device_for_each_child_reverse_from(struct device *parent,
+				       struct device *from, void *data,
+				       device_iter_t fn)
 {
 	struct klist_iter i;
 	struct device *child;
+	int error = 0;
 
-	if (!parent)
-		return NULL;
+	if (!parent || !parent->p)
+		return 0;
 
-	klist_iter_init(&parent->p->klist_children, &i);
-	while ((child = next_device(&i)))
-		if (match(child, data) && get_device(child))
-			break;
+	klist_iter_init_node(&parent->p->klist_children, &i,
+			     (from ? &from->p->knode_parent : NULL));
+	while ((child = prev_device(&i)) && !error)
+		error = fn(child, data);
 	klist_iter_exit(&i);
-	return child;
+	return error;
 }
-EXPORT_SYMBOL_GPL(device_find_child);
+EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from);
 
 /**
- * device_find_child_by_name - device iterator for locating a child device.
+ * device_find_child - device iterator for locating a particular device.
  * @parent: parent struct device
- * @name: name of the child device
+ * @data: Data to pass to match function
+ * @match: Callback function to check device
  *
- * This is similar to the device_find_child() function above, but it
- * returns a reference to a device that has the name @name.
+ * This is similar to the device_for_each_child() function above, but it
+ * returns a reference to a device that is 'found' for later use, as
+ * determined by the @match callback.
+ *
+ * The callback should return 0 if the device doesn't match and non-zero
+ * if it does.  If the callback returns non-zero and a reference to the
+ * current device can be obtained, this function will return to the caller
+ * and not iterate over any more devices.
  *
  * NOTE: you will need to drop the reference with put_device() after use.
  */
-struct device *device_find_child_by_name(struct device *parent,
-					 const char *name)
+struct device *device_find_child(struct device *parent, const void *data,
+				 device_match_t match)
 {
 	struct klist_iter i;
 	struct device *child;
 
-	if (!parent)
+	if (!parent || !parent->p)
 		return NULL;
 
 	klist_iter_init(&parent->p->klist_children, &i);
-	while ((child = next_device(&i)))
-		if (sysfs_streq(dev_name(child), name) && get_device(child))
+	while ((child = next_device(&i))) {
+		if (match(child, data)) {
+			get_device(child);
 			break;
+		}
+	}
 	klist_iter_exit(&i);
 	return child;
 }
-EXPORT_SYMBOL_GPL(device_find_child_by_name);
+EXPORT_SYMBOL_GPL(device_find_child);
 
 int __init devices_init(void)
 {
@@ -3783,9 +4138,14 @@ int __init devices_init(void)
 	sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
 	if (!sysfs_dev_char_kobj)
 		goto char_kobj_err;
+	device_link_wq = alloc_workqueue("device_link_wq", WQ_PERCPU, 0);
+	if (!device_link_wq)
+		goto wq_err;
 
 	return 0;
 
+ wq_err:
+	kobject_put(sysfs_dev_char_kobj);
  char_kobj_err:
 	kobject_put(sysfs_dev_block_kobj);
  block_kobj_err:
@@ -3979,7 +4339,7 @@ static void device_create_release(struct device *dev)
 }
 
 static __printf(6, 0) struct device *
-device_create_groups_vargs(struct class *class, struct device *parent,
+device_create_groups_vargs(const struct class *class, struct device *parent,
 			   dev_t devt, void *drvdata,
 			   const struct attribute_group **groups,
 			   const char *fmt, va_list args)
@@ -3987,7 +4347,7 @@ device_create_groups_vargs(struct class *class, struct device *parent,
 	struct device *dev = NULL;
 	int retval = -ENODEV;
 
-	if (class == NULL || IS_ERR(class))
+	if (IS_ERR_OR_NULL(class))
 		goto error;
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -4039,11 +4399,8 @@ error:
  * pointer.
  *
  * Returns &struct device pointer on success, or ERR_PTR() on error.
- *
- * Note: the struct class passed to this function must have previously
- * been created with a call to class_create().
  */
-struct device *device_create(struct class *class, struct device *parent,
+struct device *device_create(const struct class *class, struct device *parent,
 			     dev_t devt, void *drvdata, const char *fmt, ...)
 {
 	va_list vargs;
@@ -4080,11 +4437,8 @@ EXPORT_SYMBOL_GPL(device_create);
  * pointer.
  *
  * Returns &struct device pointer on success, or ERR_PTR() on error.
- *
- * Note: the struct class passed to this function must have previously
- * been created with a call to class_create().
  */
-struct device *device_create_with_groups(struct class *class,
+struct device *device_create_with_groups(const struct class *class,
 					 struct device *parent, dev_t devt,
 					 void *drvdata,
 					 const struct attribute_group **groups,
@@ -4109,7 +4463,7 @@ EXPORT_SYMBOL_GPL(device_create_with_groups);
  * This call unregisters and cleans up a device that was created with a
  * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+void device_destroy(const struct class *class, dev_t devt)
 {
 	struct device *dev;
 
@@ -4131,9 +4485,12 @@ EXPORT_SYMBOL_GPL(device_destroy);
  * on the same device to ensure that new_name is valid and
  * won't conflict with other devices.
  *
- * Note: Don't call this function.  Currently, the networking layer calls this
- * function, but that will change.  The following text from Kay Sievers offers
- * some insight:
+ * Note: given that some subsystems (networking and infiniband) use this
+ * function, with no immediate plans for this to change, we cannot assume or
+ * require that this function not be called at all.
+ *
+ * However, if you're writing new code, do not call this function. The following
+ * text from Kay Sievers offers some insight:
  *
  * Renaming devices is racy at many levels, symlinks and other stuff are not
  * replaced atomically, and you get a "move" uevent, but it's not easy to
@@ -4147,13 +4504,6 @@ EXPORT_SYMBOL_GPL(device_destroy);
  * kernel device renaming. Besides that, it's not even implemented now for
  * other things than (driver-core wise very simple) network devices.
  *
- * We are currently about to change network renaming in udev to completely
- * disallow renaming of devices in the same namespace as the kernel uses,
- * because we can't solve the problems properly, that arise with swapping names
- * of multiple interfaces without races. Means, renaming of eth[0-9]* will only
- * be allowed to some other name than eth[0-9]*, for the aforementioned
- * reasons.
- *
  * Make up a "real" name in the driver before you register anything, or add
  * some other attributes for userspace to find the device, or use udev to add
  * symlinks -- but never rename kernel devices later, it's a complete mess. We
@@ -4162,9 +4512,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
  */
 int device_rename(struct device *dev, const char *new_name)
 {
+	struct subsys_private *sp = NULL;
 	struct kobject *kobj = &dev->kobj;
 	char *old_device_name = NULL;
 	int error;
+	bool is_link_renamed = false;
 
 	dev = get_device(dev);
 	if (!dev)
@@ -4179,18 +4531,28 @@ int device_rename(struct device *dev, const char *new_name)
 	}
 
 	if (dev->class) {
-		error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj,
-					     kobj, old_device_name,
+		sp = class_to_subsys(dev->class);
+
+		if (!sp) {
+			error = -EINVAL;
+			goto out;
+		}
+
+		error = sysfs_rename_link_ns(&sp->subsys.kobj, kobj, old_device_name,
 					     new_name, kobject_namespace(kobj));
 		if (error)
 			goto out;
+
+		is_link_renamed = true;
 	}
 
 	error = kobject_rename(kobj, new_name);
-	if (error)
-		goto out;
-
 out:
+	if (error && is_link_renamed)
+		sysfs_rename_link_ns(&sp->subsys.kobj, kobj, new_name,
+				     old_device_name, kobject_namespace(kobj));
+	subsys_put(sp);
+
 	put_device(dev);
 
 	kfree(old_device_name);
@@ -4306,7 +4668,7 @@ static int device_attrs_change_owner(struct device *dev, kuid_t kuid,
 				     kgid_t kgid)
 {
 	struct kobject *kobj = &dev->kobj;
-	struct class *class = dev->class;
+	const struct class *class = dev->class;
 	const struct device_type *type = dev->type;
 	int error;
 
@@ -4364,6 +4726,7 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
 {
 	int error;
 	struct kobject *kobj = &dev->kobj;
+	struct subsys_private *sp;
 
 	dev = get_device(dev);
 	if (!dev)
@@ -4400,21 +4763,19 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
 	if (error)
 		goto out;
 
-#ifdef CONFIG_BLOCK
-	if (sysfs_deprecated && dev->class == &block_class)
-		goto out;
-#endif
-
 	/*
 	 * Change the owner of the symlink located in the class directory of
 	 * the device class associated with @dev which points to the actual
 	 * directory entry for @dev to @kuid/@kgid. This ensures that the
 	 * symlink shows the same permissions as its target.
 	 */
-	error = sysfs_link_change_owner(&dev->class->p->subsys.kobj, &dev->kobj,
-					dev_name(dev), kuid, kgid);
-	if (error)
+	sp = class_to_subsys(dev->class);
+	if (!sp) {
+		error = -EINVAL;
 		goto out;
+	}
+	error = sysfs_link_change_owner(&sp->subsys.kobj, &dev->kobj, dev_name(dev), kuid, kgid);
+	subsys_put(sp);
 
 out:
 	put_device(dev);
@@ -4513,7 +4874,7 @@ set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
 	else
 		return;
 
-	strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
+	strscpy(dev_info->subsystem, subsys);
 
 	/*
 	 * Add device identifier DEVICE=:
@@ -4579,8 +4940,8 @@ static void __dev_printk(const char *level, const struct device *dev,
 		printk("%s(NULL device *): %pV", level, vaf);
 }
 
-void dev_printk(const char *level, const struct device *dev,
-		const char *fmt, ...)
+void _dev_printk(const char *level, const struct device *dev,
+		 const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
@@ -4594,7 +4955,7 @@ void dev_printk(const char *level, const struct device *dev,
 
 	va_end(args);
 }
-EXPORT_SYMBOL(dev_printk);
+EXPORT_SYMBOL(_dev_printk);
 
 #define define_dev_printk_level(func, kern_level)		\
 void func(const struct device *dev, const char *fmt, ...)	\
@@ -4623,6 +4984,49 @@ define_dev_printk_level(_dev_info, KERN_INFO);
 
 #endif
 
+static void __dev_probe_failed(const struct device *dev, int err, bool fatal,
+			       const char *fmt, va_list vargsp)
+{
+	struct va_format vaf;
+	va_list vargs;
+
+	/*
+	 * On x86_64 and possibly on other architectures, va_list is actually a
+	 * size-1 array containing a structure.  As a result, function parameter
+	 * vargsp decays from T[1] to T*, and &vargsp has type T** rather than
+	 * T(*)[1], which is expected by its assignment to vaf.va below.
+	 *
+	 * One standard way to solve this mess is by creating a copy in a local
+	 * variable of type va_list and then using a pointer to that local copy
+	 * instead, which is the approach employed here.
+	 */
+	va_copy(vargs, vargsp);
+
+	vaf.fmt = fmt;
+	vaf.va = &vargs;
+
+	switch (err) {
+	case -EPROBE_DEFER:
+		device_set_deferred_probe_reason(dev, &vaf);
+		dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
+		break;
+
+	case -ENOMEM:
+		/* Don't print anything on -ENOMEM, there's already enough output */
+		break;
+
+	default:
+		/* Log fatal final failures as errors, otherwise produce warnings */
+		if (fatal)
+			dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
+		else
+			dev_warn(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
+		break;
+	}
+
+	va_end(vargs);
+}
+
 /**
  * dev_err_probe - probe error check and log helper
  * @dev: the pointer to the struct device
@@ -4635,7 +5039,7 @@ define_dev_printk_level(_dev_info, KERN_INFO);
  * -EPROBE_DEFER and propagate error upwards.
  * In case of -EPROBE_DEFER it sets also defer probe reason, which can be
  * checked later by reading devices_deferred debugfs attribute.
- * It replaces code sequence::
+ * It replaces the following code sequence::
  *
  * 	if (err != -EPROBE_DEFER)
  * 		dev_err(dev, ...);
@@ -4647,31 +5051,78 @@ define_dev_printk_level(_dev_info, KERN_INFO);
  *
  * 	return dev_err_probe(dev, err, ...);
  *
- * Returns @err.
+ * Using this helper in your probe function is totally fine even if @err
+ * is known to never be -EPROBE_DEFER.
+ * The benefit compared to a normal dev_err() is the standardized format
+ * of the error code, which is emitted symbolically (i.e. you get "EAGAIN"
+ * instead of "-35"), and having the error code returned allows more
+ * compact error paths.
  *
+ * Returns @err.
  */
 int dev_err_probe(const struct device *dev, int err, const char *fmt, ...)
 {
-	struct va_format vaf;
-	va_list args;
+	va_list vargs;
 
-	va_start(args, fmt);
-	vaf.fmt = fmt;
-	vaf.va = &args;
+	va_start(vargs, fmt);
 
-	if (err != -EPROBE_DEFER) {
-		dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
-	} else {
-		device_set_deferred_probe_reason(dev, &vaf);
-		dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
-	}
+	/* Use dev_err() for logging when err doesn't equal -EPROBE_DEFER */
+	__dev_probe_failed(dev, err, true, fmt, vargs);
 
-	va_end(args);
+	va_end(vargs);
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(dev_err_probe);
 
+/**
+ * dev_warn_probe - probe error check and log helper
+ * @dev: the pointer to the struct device
+ * @err: error value to test
+ * @fmt: printf-style format string
+ * @...: arguments as specified in the format string
+ *
+ * This helper implements common pattern present in probe functions for error
+ * checking: print debug or warning message depending if the error value is
+ * -EPROBE_DEFER and propagate error upwards.
+ * In case of -EPROBE_DEFER it sets also defer probe reason, which can be
+ * checked later by reading devices_deferred debugfs attribute.
+ * It replaces the following code sequence::
+ *
+ * 	if (err != -EPROBE_DEFER)
+ * 		dev_warn(dev, ...);
+ * 	else
+ * 		dev_dbg(dev, ...);
+ * 	return err;
+ *
+ * with::
+ *
+ * 	return dev_warn_probe(dev, err, ...);
+ *
+ * Using this helper in your probe function is totally fine even if @err
+ * is known to never be -EPROBE_DEFER.
+ * The benefit compared to a normal dev_warn() is the standardized format
+ * of the error code, which is emitted symbolically (i.e. you get "EAGAIN"
+ * instead of "-35"), and having the error code returned allows more
+ * compact error paths.
+ *
+ * Returns @err.
+ */
+int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...)
+{
+	va_list vargs;
+
+	va_start(vargs, fmt);
+
+	/* Use dev_warn() for logging when err doesn't equal -EPROBE_DEFER */
+	__dev_probe_failed(dev, err, false, fmt, vargs);
+
+	va_end(vargs);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(dev_warn_probe);
+
 static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
 {
 	return fwnode && !IS_ERR(fwnode->secondary);
@@ -4708,9 +5159,13 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 	} else {
 		if (fwnode_is_primary(fn)) {
 			dev->fwnode = fn->secondary;
+
+			/* Skip nullifying fn->secondary if the primary is shared */
+			if (parent && fn == parent->fwnode)
+				return;
+
 			/* Set fn->secondary = NULL, so fn remains the primary fwnode */
-			if (!(parent && fn == parent->fwnode))
-				fn->secondary = NULL;
+			fn->secondary = NULL;
 		} else {
 			dev->fwnode = NULL;
 		}
@@ -4740,6 +5195,67 @@ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 EXPORT_SYMBOL_GPL(set_secondary_fwnode);
 
 /**
+ * device_remove_of_node - Remove an of_node from a device
+ * @dev: device whose device tree node is being removed
+ */
+void device_remove_of_node(struct device *dev)
+{
+	dev = get_device(dev);
+	if (!dev)
+		return;
+
+	if (!dev->of_node)
+		goto end;
+
+	if (dev->fwnode == of_fwnode_handle(dev->of_node))
+		dev->fwnode = NULL;
+
+	of_node_put(dev->of_node);
+	dev->of_node = NULL;
+
+end:
+	put_device(dev);
+}
+EXPORT_SYMBOL_GPL(device_remove_of_node);
+
+/**
+ * device_add_of_node - Add an of_node to an existing device
+ * @dev: device whose device tree node is being added
+ * @of_node: of_node to add
+ *
+ * Return: 0 on success or error code on failure.
+ */
+int device_add_of_node(struct device *dev, struct device_node *of_node)
+{
+	int ret;
+
+	if (!of_node)
+		return -EINVAL;
+
+	dev = get_device(dev);
+	if (!dev)
+		return -EINVAL;
+
+	if (dev->of_node) {
+		dev_err(dev, "Cannot replace node %pOF with %pOF\n",
+			dev->of_node, of_node);
+		ret = -EBUSY;
+		goto end;
+	}
+
+	dev->of_node = of_node_get(of_node);
+
+	if (!dev->fwnode)
+		dev->fwnode = of_fwnode_handle(of_node);
+
+	ret = 0;
+end:
+	put_device(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(device_add_of_node);
+
+/**
  * device_set_of_node_from_dev - reuse device-tree node of another device
  * @dev: device whose device-tree node is being set
  * @dev2: device whose device-tree node is being reused
@@ -4762,21 +5278,52 @@ void device_set_node(struct device *dev, struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(device_set_node);
 
+/**
+ * get_dev_from_fwnode - Obtain a reference count of the struct device the
+ * struct fwnode_handle is associated with.
+ * @fwnode: The pointer to the struct fwnode_handle to obtain the struct device
+ * reference count of.
+ *
+ * This function obtains a reference count of the device the device pointer
+ * embedded in the struct fwnode_handle points to.
+ *
+ * Note that the struct device pointer embedded in struct fwnode_handle does
+ * *not* have a reference count of the struct device itself.
+ *
+ * Hence, it is a UAF (and thus a bug) to call this function if the caller can't
+ * guarantee that the last reference count of the corresponding struct device is
+ * not dropped concurrently.
+ *
+ * This is possible since struct fwnode_handle has its own reference count and
+ * hence can out-live the struct device it is associated with.
+ */
+struct device *get_dev_from_fwnode(struct fwnode_handle *fwnode)
+{
+	return get_device((fwnode)->dev);
+}
+EXPORT_SYMBOL_GPL(get_dev_from_fwnode);
+
 int device_match_name(struct device *dev, const void *name)
 {
 	return sysfs_streq(dev_name(dev), name);
 }
 EXPORT_SYMBOL_GPL(device_match_name);
 
+int device_match_type(struct device *dev, const void *type)
+{
+	return dev->type == type;
+}
+EXPORT_SYMBOL_GPL(device_match_type);
+
 int device_match_of_node(struct device *dev, const void *np)
 {
-	return dev->of_node == np;
+	return np && dev->of_node == np;
 }
 EXPORT_SYMBOL_GPL(device_match_of_node);
 
 int device_match_fwnode(struct device *dev, const void *fwnode)
 {
-	return dev_fwnode(dev) == fwnode;
+	return fwnode && dev_fwnode(dev) == fwnode;
 }
 EXPORT_SYMBOL_GPL(device_match_fwnode);
 
@@ -4788,10 +5335,16 @@ EXPORT_SYMBOL_GPL(device_match_devt);
 
 int device_match_acpi_dev(struct device *dev, const void *adev)
 {
-	return ACPI_COMPANION(dev) == adev;
+	return adev && ACPI_COMPANION(dev) == adev;
 }
 EXPORT_SYMBOL(device_match_acpi_dev);
 
+int device_match_acpi_handle(struct device *dev, const void *handle)
+{
+	return handle && ACPI_HANDLE(dev) == handle;
+}
+EXPORT_SYMBOL(device_match_acpi_handle);
+
 int device_match_any(struct device *dev, const void *unused)
 {
 	return 1;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 5ef14db97904..c6c57b6f61c6 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -19,13 +19,14 @@
 #include <linux/cpufeature.h>
 #include <linux/tick.h>
 #include <linux/pm_qos.h>
+#include <linux/delay.h>
 #include <linux/sched/isolation.h>
 
 #include "base.h"
 
 static DEFINE_PER_CPU(struct device *, cpu_sys_devices);
 
-static int cpu_subsys_match(struct device *dev, struct device_driver *drv)
+static int cpu_subsys_match(struct device *dev, const struct device_driver *drv)
 {
 	/* ACPI style match is the only one that may succeed. */
 	if (acpi_driver_match_device(dev, drv))
@@ -50,12 +51,30 @@ static int cpu_subsys_online(struct device *dev)
 	int cpuid = dev->id;
 	int from_nid, to_nid;
 	int ret;
+	int retries = 0;
 
 	from_nid = cpu_to_node(cpuid);
 	if (from_nid == NUMA_NO_NODE)
 		return -ENODEV;
 
+retry:
 	ret = cpu_device_up(dev);
+
+	/*
+	 * If -EBUSY is returned, it is likely that hotplug is temporarily
+	 * disabled when cpu_hotplug_disable() was called. This condition is
+	 * transient. So we retry after waiting for an exponentially
+	 * increasing delay up to a total of at least 620ms as some PCI
+	 * device initialization can take quite a while.
+	 */
+	if (ret == -EBUSY) {
+		retries++;
+		if (retries > 5)
+			return ret;
+		msleep(10 * (1 << retries));
+		goto retry;
+	}
+
 	/*
 	 * When hot adding memory to memoryless node and enabling a cpu
 	 * on the node, node number of the cpu may internally change.
@@ -76,6 +95,7 @@ void unregister_cpu(struct cpu *cpu)
 {
 	int logical_cpu = cpu->dev.id;
 
+	set_cpu_enabled(logical_cpu, false);
 	unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
 
 	device_unregister(&cpu->dev);
@@ -125,18 +145,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 #endif /* CONFIG_HOTPLUG_CPU */
 
-struct bus_type cpu_subsys = {
-	.name = "cpu",
-	.dev_name = "cpu",
-	.match = cpu_subsys_match,
-#ifdef CONFIG_HOTPLUG_CPU
-	.online = cpu_subsys_online,
-	.offline = cpu_subsys_offline,
-#endif
-};
-EXPORT_SYMBOL_GPL(cpu_subsys);
-
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_CRASH_DUMP
 #include <linux/kexec.h>
 
 static ssize_t crash_notes_show(struct device *dev,
@@ -181,14 +190,14 @@ static const struct attribute_group crash_note_cpu_attr_group = {
 #endif
 
 static const struct attribute_group *common_cpu_attr_groups[] = {
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_CRASH_DUMP
 	&crash_note_cpu_attr_group,
 #endif
 	NULL
 };
 
 static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_CRASH_DUMP
 	&crash_note_cpu_attr_group,
 #endif
 	NULL
@@ -265,6 +274,13 @@ static ssize_t print_cpus_offline(struct device *dev,
 }
 static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL);
 
+static ssize_t print_cpus_enabled(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask));
+}
+static DEVICE_ATTR(enabled, 0444, print_cpus_enabled, NULL);
+
 static ssize_t print_cpus_isolated(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -275,7 +291,7 @@ static ssize_t print_cpus_isolated(struct device *dev,
 		return -ENOMEM;
 
 	cpumask_andnot(isolated, cpu_possible_mask,
-		       housekeeping_cpumask(HK_FLAG_DOMAIN));
+		       housekeeping_cpumask(HK_TYPE_DOMAIN));
 	len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
 
 	free_cpumask_var(isolated);
@@ -284,13 +300,40 @@ static ssize_t print_cpus_isolated(struct device *dev,
 }
 static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL);
 
+static ssize_t housekeeping_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	const struct cpumask *hk_mask;
+
+	hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE);
+
+	if (housekeeping_enabled(HK_TYPE_KERNEL_NOISE))
+		return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(hk_mask));
+	return sysfs_emit(buf, "\n");
+}
+static DEVICE_ATTR_RO(housekeeping);
+
 #ifdef CONFIG_NO_HZ_FULL
-static ssize_t print_cpus_nohz_full(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static ssize_t nohz_full_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	if (cpumask_available(tick_nohz_full_mask))
+		return sysfs_emit(buf, "%*pbl\n",
+				  cpumask_pr_args(tick_nohz_full_mask));
+	return sysfs_emit(buf, "\n");
+}
+static DEVICE_ATTR_RO(nohz_full);
+#endif
+
+#ifdef CONFIG_CRASH_HOTPLUG
+static ssize_t crash_hotplug_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
 {
-	return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask));
+	return sysfs_emit(buf, "%d\n", crash_check_hotplug_support());
 }
-static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL);
+static DEVICE_ATTR_RO(crash_hotplug);
 #endif
 
 static void cpu_device_release(struct device *dev)
@@ -299,7 +342,7 @@ static void cpu_device_release(struct device *dev)
 	 * This is an empty function to prevent the driver core from spitting a
 	 * warning at us.  Yes, I know this is directly opposite of what the
 	 * documentation for the driver core and kobjects say, and the author
-	 * of this code has already been publically ridiculed for doing
+	 * of this code has already been publicly ridiculed for doing
 	 * something as foolish as this.  However, at this point in time, it is
 	 * the only way to handle the issue of statically allocated cpu
 	 * devices.  The different architectures will have their cpu device
@@ -336,7 +379,7 @@ static ssize_t print_cpu_modalias(struct device *dev,
 	return len;
 }
 
-static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int cpu_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
 	char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (buf) {
@@ -348,6 +391,20 @@ static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
 }
 #endif
 
+const struct bus_type cpu_subsys = {
+	.name = "cpu",
+	.dev_name = "cpu",
+	.match = cpu_subsys_match,
+#ifdef CONFIG_HOTPLUG_CPU
+	.online = cpu_subsys_online,
+	.offline = cpu_subsys_offline,
+#endif
+#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
+	.uevent = cpu_uevent,
+#endif
+};
+EXPORT_SYMBOL_GPL(cpu_subsys);
+
 /*
  * register_cpu - Setup a sysfs device for a CPU.
  * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
@@ -368,9 +425,6 @@ int register_cpu(struct cpu *cpu, int num)
 	cpu->dev.offline_disabled = !cpu->hotpluggable;
 	cpu->dev.offline = !cpu_online(num);
 	cpu->dev.of_node = of_get_cpu_node(num, NULL);
-#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
-	cpu->dev.bus->uevent = cpu_uevent;
-#endif
 	cpu->dev.groups = common_cpu_attr_groups;
 	if (cpu->hotpluggable)
 		cpu->dev.groups = hotplugable_cpu_attr_groups;
@@ -384,11 +438,12 @@ int register_cpu(struct cpu *cpu, int num)
 	register_cpu_under_node(num, cpu_to_node(num));
 	dev_pm_qos_expose_latency_limit(&cpu->dev,
 					PM_QOS_RESUME_LATENCY_NO_CONSTRAINT);
+	set_cpu_enabled(num, true);
 
 	return 0;
 }
 
-struct device *get_cpu_device(unsigned cpu)
+struct device *get_cpu_device(unsigned int cpu)
 {
 	if (cpu < nr_cpu_ids && cpu_possible(cpu))
 		return per_cpu(cpu_sys_devices, cpu);
@@ -465,10 +520,15 @@ static struct attribute *cpu_root_attrs[] = {
 	&cpu_attrs[2].attr.attr,
 	&dev_attr_kernel_max.attr,
 	&dev_attr_offline.attr,
+	&dev_attr_enabled.attr,
 	&dev_attr_isolated.attr,
+	&dev_attr_housekeeping.attr,
 #ifdef CONFIG_NO_HZ_FULL
 	&dev_attr_nohz_full.attr,
 #endif
+#ifdef CONFIG_CRASH_HOTPLUG
+	&dev_attr_crash_hotplug.attr,
+#endif
 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
 	&dev_attr_modalias.attr,
 #endif
@@ -484,86 +544,85 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
 	NULL,
 };
 
-bool cpu_is_hotpluggable(unsigned cpu)
+bool cpu_is_hotpluggable(unsigned int cpu)
 {
 	struct device *dev = get_cpu_device(cpu);
-	return dev && container_of(dev, struct cpu, dev)->hotpluggable;
+	return dev && container_of(dev, struct cpu, dev)->hotpluggable
+		&& tick_nohz_cpu_hotpluggable(cpu);
 }
 EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
 
 #ifdef CONFIG_GENERIC_CPU_DEVICES
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
-#endif
+DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-static void __init cpu_dev_register_generic(void)
+bool __weak arch_cpu_is_hotpluggable(int cpu)
 {
-#ifdef CONFIG_GENERIC_CPU_DEVICES
-	int i;
-
-	for_each_possible_cpu(i) {
-		if (register_cpu(&per_cpu(cpu_devices, i), i))
-			panic("Failed to register CPU device");
-	}
-#endif
+	return false;
 }
 
-#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
-
-ssize_t __weak cpu_show_meltdown(struct device *dev,
-				 struct device_attribute *attr, char *buf)
+int __weak arch_register_cpu(int cpu)
 {
-	return sysfs_emit(buf, "Not affected\n");
-}
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
 
-ssize_t __weak cpu_show_spectre_v1(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	return sysfs_emit(buf, "Not affected\n");
-}
+	c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
 
-ssize_t __weak cpu_show_spectre_v2(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	return sysfs_emit(buf, "Not affected\n");
+	return register_cpu(c, cpu);
 }
 
-ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
-					  struct device_attribute *attr, char *buf)
-{
-	return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_l1tf(struct device *dev,
-			     struct device_attribute *attr, char *buf)
+#ifdef CONFIG_HOTPLUG_CPU
+void __weak arch_unregister_cpu(int num)
 {
-	return sysfs_emit(buf, "Not affected\n");
+	unregister_cpu(&per_cpu(cpu_devices, num));
 }
+#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_GENERIC_CPU_DEVICES */
 
-ssize_t __weak cpu_show_mds(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+static void __init cpu_dev_register_generic(void)
 {
-	return sysfs_emit(buf, "Not affected\n");
-}
+	int i, ret;
 
-ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sysfs_emit(buf, "Not affected\n");
-}
+	if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES))
+		return;
 
-ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	return sysfs_emit(buf, "Not affected\n");
+	for_each_present_cpu(i) {
+		ret = arch_register_cpu(i);
+		if (ret && ret != -EPROBE_DEFER)
+			pr_warn("register_cpu %d failed (%d)\n", i, ret);
+	}
 }
 
-ssize_t __weak cpu_show_srbds(struct device *dev,
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+static ssize_t cpu_show_not_affected(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
 	return sysfs_emit(buf, "Not affected\n");
 }
 
+#define CPU_SHOW_VULN_FALLBACK(func)					\
+	ssize_t cpu_show_##func(struct device *,			\
+				  struct device_attribute *, char *)	\
+		 __attribute__((weak, alias("cpu_show_not_affected")))
+
+CPU_SHOW_VULN_FALLBACK(meltdown);
+CPU_SHOW_VULN_FALLBACK(spectre_v1);
+CPU_SHOW_VULN_FALLBACK(spectre_v2);
+CPU_SHOW_VULN_FALLBACK(spec_store_bypass);
+CPU_SHOW_VULN_FALLBACK(l1tf);
+CPU_SHOW_VULN_FALLBACK(mds);
+CPU_SHOW_VULN_FALLBACK(tsx_async_abort);
+CPU_SHOW_VULN_FALLBACK(itlb_multihit);
+CPU_SHOW_VULN_FALLBACK(srbds);
+CPU_SHOW_VULN_FALLBACK(mmio_stale_data);
+CPU_SHOW_VULN_FALLBACK(retbleed);
+CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
+CPU_SHOW_VULN_FALLBACK(gds);
+CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
+CPU_SHOW_VULN_FALLBACK(ghostwrite);
+CPU_SHOW_VULN_FALLBACK(old_microcode);
+CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
+CPU_SHOW_VULN_FALLBACK(tsa);
+CPU_SHOW_VULN_FALLBACK(vmscape);
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -573,6 +632,16 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
 static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
 static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
 static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
+static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
+static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
+static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
+static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL);
+static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL);
+static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
+static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
+static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
@@ -584,6 +653,16 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_tsx_async_abort.attr,
 	&dev_attr_itlb_multihit.attr,
 	&dev_attr_srbds.attr,
+	&dev_attr_mmio_stale_data.attr,
+	&dev_attr_retbleed.attr,
+	&dev_attr_spec_rstack_overflow.attr,
+	&dev_attr_gather_data_sampling.attr,
+	&dev_attr_reg_file_data_sampling.attr,
+	&dev_attr_ghostwrite.attr,
+	&dev_attr_old_microcode.attr,
+	&dev_attr_indirect_target_selection.attr,
+	&dev_attr_tsa.attr,
+	&dev_attr_vmscape.attr,
 	NULL
 };
 
@@ -594,9 +673,13 @@ static const struct attribute_group cpu_root_vulnerabilities_group = {
 
 static void __init cpu_register_vulnerabilities(void)
 {
-	if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
-			       &cpu_root_vulnerabilities_group))
-		pr_err("Unable to register CPU vulnerabilities\n");
+	struct device *dev = bus_get_dev_root(&cpu_subsys);
+
+	if (dev) {
+		if (sysfs_create_group(&dev->kobj, &cpu_root_vulnerabilities_group))
+			pr_err("Unable to register CPU vulnerabilities\n");
+		put_device(dev);
+	}
 }
 
 #else
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index daeb9b5763ae..349f31bedfa1 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -25,6 +25,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/async.h>
+#include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/pinctrl/devinfo.h>
 #include <linux/slab.h>
@@ -60,6 +61,7 @@ static bool initcalls_done;
 /* Save the async probe drivers' name from kernel cmdline */
 #define ASYNC_DRV_NAMES_MAX_LEN	256
 static char async_probe_drv_names[ASYNC_DRV_NAMES_MAX_LEN];
+static bool async_probe_default;
 
 /*
  * In some cases, like suspend to RAM or hibernation, It might be reasonable
@@ -152,7 +154,7 @@ void driver_deferred_probe_del(struct device *dev)
 	mutex_unlock(&deferred_probe_mutex);
 }
 
-static bool driver_deferred_probe_enable = false;
+static bool driver_deferred_probe_enable;
 /**
  * driver_deferred_probe_trigger() - Kick off re-probing deferred devices
  *
@@ -171,7 +173,7 @@ static bool driver_deferred_probe_enable = false;
  * changes in the midst of a probe, then deferred processing should be triggered
  * again.
  */
-static void driver_deferred_probe_trigger(void)
+void driver_deferred_probe_trigger(void)
 {
 	if (!driver_deferred_probe_enable)
 		return;
@@ -191,7 +193,7 @@ static void driver_deferred_probe_trigger(void)
 	 * Kick the re-probe thread.  It may already be scheduled, but it is
 	 * safe to kick it again.
 	 */
-	queue_work(system_unbound_wq, &deferred_probe_work);
+	queue_work(system_dfl_wq, &deferred_probe_work);
 }
 
 /**
@@ -247,7 +249,7 @@ static int deferred_devs_show(struct seq_file *s, void *data)
 
 	list_for_each_entry(curr, &deferred_probe_pending_list, deferred_probe)
 		seq_printf(s, "%s\t%s", dev_name(curr->device),
-			   curr->device->p->deferred_probe_reason ?: "\n");
+			   curr->deferred_probe_reason ?: "\n");
 
 	mutex_unlock(&deferred_probe_mutex);
 
@@ -255,9 +257,11 @@ static int deferred_devs_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(deferred_devs);
 
-int driver_deferred_probe_timeout;
-EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout);
-static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue);
+#ifdef CONFIG_MODULES
+static int driver_deferred_probe_timeout = 10;
+#else
+static int driver_deferred_probe_timeout;
+#endif
 
 static int __init deferred_probe_timeout_setup(char *str)
 {
@@ -274,10 +278,10 @@ __setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
  * @dev: device to check
  *
  * Return:
- * -ENODEV if initcalls have completed and modules are disabled.
- * -ETIMEDOUT if the deferred probe timeout was set and has expired
- *  and modules are enabled.
- * -EPROBE_DEFER in other cases.
+ * * -ENODEV if initcalls have completed and modules are disabled.
+ * * -ETIMEDOUT if the deferred probe timeout was set and has expired
+ *   and modules are enabled.
+ * * -EPROBE_DEFER in other cases.
  *
  * Drivers or subsystems can opt-in to calling this function instead of directly
  * returning -EPROBE_DEFER.
@@ -296,6 +300,7 @@ int driver_deferred_probe_check_state(struct device *dev)
 
 	return -EPROBE_DEFER;
 }
+EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state);
 
 static void deferred_probe_timeout_work_func(struct work_struct *work)
 {
@@ -309,12 +314,27 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
 
 	mutex_lock(&deferred_probe_mutex);
 	list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe)
-		dev_info(p->device, "deferred probe pending\n");
+		dev_warn(p->device, "deferred probe pending: %s", p->deferred_probe_reason ?: "(reason unknown)\n");
 	mutex_unlock(&deferred_probe_mutex);
-	wake_up_all(&probe_timeout_waitqueue);
+
+	fw_devlink_probing_done();
 }
 static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
 
+void deferred_probe_extend_timeout(void)
+{
+	/*
+	 * If the work hasn't been queued yet or if the work expired, don't
+	 * start a new one.
+	 */
+	if (cancel_delayed_work(&deferred_probe_timeout_work)) {
+		schedule_delayed_work(&deferred_probe_timeout_work,
+				driver_deferred_probe_timeout * HZ);
+		pr_debug("Extended deferred probe timeout by %d secs\n",
+					driver_deferred_probe_timeout);
+	}
+}
+
 /**
  * deferred_probe_initcall() - Enable probing of deferred devices
  *
@@ -347,13 +367,17 @@ static int deferred_probe_initcall(void)
 		schedule_delayed_work(&deferred_probe_timeout_work,
 			driver_deferred_probe_timeout * HZ);
 	}
+
+	if (!IS_ENABLED(CONFIG_MODULES))
+		fw_devlink_probing_done();
+
 	return 0;
 }
 late_initcall(deferred_probe_initcall);
 
 static void __exit deferred_probe_exit(void)
 {
-	debugfs_remove_recursive(debugfs_lookup("devices_deferred", NULL));
+	debugfs_lookup_and_remove("devices_deferred", NULL);
 }
 __exitcall(deferred_probe_exit);
 
@@ -370,17 +394,17 @@ bool device_is_bound(struct device *dev)
 {
 	return dev->p && klist_node_attached(&dev->p->knode_driver);
 }
+EXPORT_SYMBOL_GPL(device_is_bound);
 
 static void driver_bound(struct device *dev)
 {
 	if (device_is_bound(dev)) {
-		pr_warn("%s: device %s already bound\n",
-			__func__, kobject_name(&dev->kobj));
+		dev_warn(dev, "%s: device already bound\n", __func__);
 		return;
 	}
 
-	pr_debug("driver: '%s': %s: bound to device '%s'\n", dev->driver->name,
-		 __func__, dev_name(dev));
+	dev_dbg(dev, "driver: '%s': %s: bound to device\n", dev->driver->name,
+		__func__);
 
 	klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
 	device_links_driver_bound(dev);
@@ -394,10 +418,7 @@ static void driver_bound(struct device *dev)
 	driver_deferred_probe_del(dev);
 	driver_deferred_probe_trigger();
 
-	if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_BOUND_DRIVER, dev);
-
+	bus_notify(dev, BUS_NOTIFY_BOUND_DRIVER);
 	kobject_uevent(&dev->kobj, KOBJ_BIND);
 }
 
@@ -416,9 +437,7 @@ static int driver_sysfs_add(struct device *dev)
 {
 	int ret;
 
-	if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_BIND_DRIVER, dev);
+	bus_notify(dev, BUS_NOTIFY_BIND_DRIVER);
 
 	ret = sysfs_create_link(&dev->driver->p->kobj, &dev->kobj,
 				kobject_name(&dev->kobj));
@@ -483,9 +502,8 @@ int device_bind_driver(struct device *dev)
 		device_links_force_bind(dev);
 		driver_bound(dev);
 	}
-	else if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
+	else
+		bus_notify(dev, BUS_NOTIFY_DRIVER_NOT_BOUND);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(device_bind_driver);
@@ -493,6 +511,27 @@ EXPORT_SYMBOL_GPL(device_bind_driver);
 static atomic_t probe_count = ATOMIC_INIT(0);
 static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
 
+static ssize_t state_synced_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	int ret = 0;
+
+	if (strcmp("1", buf))
+		return -EINVAL;
+
+	device_lock(dev);
+	if (!dev->state_synced) {
+		dev->state_synced = true;
+		dev_sync_state(dev);
+	} else {
+		ret = -EINVAL;
+	}
+	device_unlock(dev);
+
+	return ret ? ret : count;
+}
+
 static ssize_t state_synced_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
@@ -504,10 +543,35 @@ static ssize_t state_synced_show(struct device *dev,
 
 	return sysfs_emit(buf, "%u\n", val);
 }
-static DEVICE_ATTR_RO(state_synced);
+static DEVICE_ATTR_RW(state_synced);
 
+static void device_unbind_cleanup(struct device *dev)
+{
+	devres_release_all(dev);
+	arch_teardown_dma_ops(dev);
+	kfree(dev->dma_range_map);
+	dev->dma_range_map = NULL;
+	device_set_driver(dev, NULL);
+	dev_set_drvdata(dev, NULL);
+	dev_pm_domain_detach(dev, dev->power.detach_power_off);
+	if (dev->pm_domain && dev->pm_domain->dismiss)
+		dev->pm_domain->dismiss(dev);
+	pm_runtime_reinit(dev);
+	dev_pm_set_driver_flags(dev, 0);
+}
 
-static int call_driver_probe(struct device *dev, struct device_driver *drv)
+static void device_remove(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_state_synced);
+	device_remove_groups(dev, dev->driver->dev_groups);
+
+	if (dev->bus && dev->bus->remove)
+		dev->bus->remove(dev);
+	else if (dev->driver->remove)
+		dev->driver->remove(dev);
+}
+
+static int call_driver_probe(struct device *dev, const struct device_driver *drv)
 {
 	int ret = 0;
 
@@ -525,24 +589,24 @@ static int call_driver_probe(struct device *dev, struct device_driver *drv)
 		break;
 	case -ENODEV:
 	case -ENXIO:
-		pr_debug("%s: probe of %s rejects match %d\n",
-			 drv->name, dev_name(dev), ret);
+		dev_dbg(dev, "probe with driver %s rejects match %d\n",
+			drv->name, ret);
 		break;
 	default:
 		/* driver matched but the probe failed */
-		pr_warn("%s: probe of %s failed with error %d\n",
-			drv->name, dev_name(dev), ret);
+		dev_err(dev, "probe with driver %s failed with error %d\n",
+			drv->name, ret);
 		break;
 	}
 
 	return ret;
 }
 
-static int really_probe(struct device *dev, struct device_driver *drv)
+static int really_probe(struct device *dev, const struct device_driver *drv)
 {
 	bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
 			   !drv->suppress_bind_attrs;
-	int ret;
+	int ret, link_ret;
 
 	if (defer_all_probes) {
 		/*
@@ -554,12 +618,12 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 		return -EPROBE_DEFER;
 	}
 
-	ret = device_links_check_suppliers(dev);
-	if (ret)
-		return ret;
+	link_ret = device_links_check_suppliers(dev);
+	if (link_ret == -EPROBE_DEFER)
+		return link_ret;
 
-	pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
-		 drv->bus->name, __func__, drv->name, dev_name(dev));
+	dev_dbg(dev, "bus: '%s': %s: probing driver %s with device\n",
+		drv->bus->name, __func__, drv->name);
 	if (!list_empty(&dev->devres_head)) {
 		dev_crit(dev, "Resources present before probing\n");
 		ret = -EBUSY;
@@ -567,7 +631,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 	}
 
 re_probe:
-	dev->driver = drv;
+	device_set_driver(dev, drv);
 
 	/* If using pinctrl, bind pins now before probing */
 	ret = pinctrl_bind_pins(dev);
@@ -577,13 +641,13 @@ re_probe:
 	if (dev->bus->dma_configure) {
 		ret = dev->bus->dma_configure(dev);
 		if (ret)
-			goto probe_failed;
+			goto pinctrl_bind_failed;
 	}
 
-	if (driver_sysfs_add(dev)) {
-		pr_err("%s: driver_sysfs_add(%s) failed\n",
-		       __func__, dev_name(dev));
-		goto probe_failed;
+	ret = driver_sysfs_add(dev);
+	if (ret) {
+		dev_err(dev, "%s: driver_sysfs_add failed\n", __func__);
+		goto sysfs_failed;
 	}
 
 	if (dev->pm_domain && dev->pm_domain->activate) {
@@ -595,6 +659,15 @@ re_probe:
 	ret = call_driver_probe(dev, drv);
 	if (ret) {
 		/*
+		 * If fw_devlink_best_effort is active (denoted by -EAGAIN), the
+		 * device might actually probe properly once some of its missing
+		 * suppliers have probed. So, treat this as if the driver
+		 * returned -EPROBE_DEFER.
+		 */
+		if (link_ret == -EAGAIN)
+			ret = -EPROBE_DEFER;
+
+		/*
 		 * Return probe errors as positive values so that the callers
 		 * can distinguish them from other errors.
 		 */
@@ -602,35 +675,28 @@ re_probe:
 		goto probe_failed;
 	}
 
-	if (device_add_groups(dev, drv->dev_groups)) {
+	ret = device_add_groups(dev, drv->dev_groups);
+	if (ret) {
 		dev_err(dev, "device_add_groups() failed\n");
 		goto dev_groups_failed;
 	}
 
-	if (dev_has_sync_state(dev) &&
-	    device_create_file(dev, &dev_attr_state_synced)) {
-		dev_err(dev, "state_synced sysfs add failed\n");
-		goto dev_sysfs_state_synced_failed;
+	if (dev_has_sync_state(dev)) {
+		ret = device_create_file(dev, &dev_attr_state_synced);
+		if (ret) {
+			dev_err(dev, "state_synced sysfs add failed\n");
+			goto dev_sysfs_state_synced_failed;
+		}
 	}
 
 	if (test_remove) {
 		test_remove = false;
 
-		device_remove_file(dev, &dev_attr_state_synced);
-		device_remove_groups(dev, drv->dev_groups);
-
-		if (dev->bus->remove)
-			dev->bus->remove(dev);
-		else if (drv->remove)
-			drv->remove(dev);
-
-		devres_release_all(dev);
+		device_remove(dev);
 		driver_sysfs_remove(dev);
-		dev->driver = NULL;
-		dev_set_drvdata(dev, NULL);
-		if (dev->pm_domain && dev->pm_domain->dismiss)
-			dev->pm_domain->dismiss(dev);
-		pm_runtime_reinit(dev);
+		if (dev->bus && dev->bus->dma_cleanup)
+			dev->bus->dma_cleanup(dev);
+		device_unbind_cleanup(dev);
 
 		goto re_probe;
 	}
@@ -641,34 +707,22 @@ re_probe:
 		dev->pm_domain->sync(dev);
 
 	driver_bound(dev);
-	pr_debug("bus: '%s': %s: bound device %s to driver %s\n",
-		 drv->bus->name, __func__, dev_name(dev), drv->name);
+	dev_dbg(dev, "bus: '%s': %s: bound device to driver %s\n",
+		drv->bus->name, __func__, drv->name);
 	goto done;
 
 dev_sysfs_state_synced_failed:
-	device_remove_groups(dev, drv->dev_groups);
 dev_groups_failed:
-	if (dev->bus->remove)
-		dev->bus->remove(dev);
-	else if (drv->remove)
-		drv->remove(dev);
+	device_remove(dev);
 probe_failed:
-	kfree(dev->dma_range_map);
-	dev->dma_range_map = NULL;
-	if (dev->bus)
-		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-					     BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
+	driver_sysfs_remove(dev);
+sysfs_failed:
+	bus_notify(dev, BUS_NOTIFY_DRIVER_NOT_BOUND);
+	if (dev->bus && dev->bus->dma_cleanup)
+		dev->bus->dma_cleanup(dev);
 pinctrl_bind_failed:
 	device_links_no_driver(dev);
-	devres_release_all(dev);
-	arch_teardown_dma_ops(dev);
-	driver_sysfs_remove(dev);
-	dev->driver = NULL;
-	dev_set_drvdata(dev, NULL);
-	if (dev->pm_domain && dev->pm_domain->dismiss)
-		dev->pm_domain->dismiss(dev);
-	pm_runtime_reinit(dev);
-	dev_pm_set_driver_flags(dev, 0);
+	device_unbind_cleanup(dev);
 done:
 	return ret;
 }
@@ -676,7 +730,7 @@ done:
 /*
  * For initcall_debug, show the driver probe time.
  */
-static int really_probe_debug(struct device *dev, struct device_driver *drv)
+static int really_probe_debug(struct device *dev, const struct device_driver *drv)
 {
 	ktime_t calltime, rettime;
 	int ret;
@@ -684,7 +738,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv)
 	calltime = ktime_get();
 	ret = really_probe(dev, drv);
 	rettime = ktime_get();
-	pr_debug("probe of %s returned %d after %lld usecs\n",
+	/*
+	 * Don't change this to pr_debug() because that requires
+	 * CONFIG_DYNAMIC_DEBUG and we want a simple 'initcall_debug' on the
+	 * kernel commandline to print this all the time at the debug level.
+	 */
+	printk(KERN_DEBUG "probe of %s returned %d after %lld usecs\n",
 		 dev_name(dev), ret, ktime_us_delta(rettime, calltime));
 	return ret;
 }
@@ -695,14 +754,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv)
  *
  * Should somehow figure out how to use a semaphore, not an atomic variable...
  */
-int driver_probe_done(void)
+bool __init driver_probe_done(void)
 {
 	int local_probe_count = atomic_read(&probe_count);
 
 	pr_debug("%s: probe_count = %d\n", __func__, local_probe_count);
-	if (local_probe_count)
-		return -EBUSY;
-	return 0;
+	return !local_probe_count;
 }
 
 /**
@@ -711,9 +768,6 @@ int driver_probe_done(void)
  */
 void wait_for_device_probe(void)
 {
-	/* wait for probe timeout */
-	wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout);
-
 	/* wait for the deferred probe workqueue to finish */
 	flush_work(&deferred_probe_work);
 
@@ -723,7 +777,7 @@ void wait_for_device_probe(void)
 }
 EXPORT_SYMBOL_GPL(wait_for_device_probe);
 
-static int __driver_probe_device(struct device_driver *drv, struct device *dev)
+static int __driver_probe_device(const struct device_driver *drv, struct device *dev)
 {
 	int ret = 0;
 
@@ -733,8 +787,8 @@ static int __driver_probe_device(struct device_driver *drv, struct device *dev)
 		return -EBUSY;
 
 	dev->can_match = true;
-	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
-		 drv->bus->name, __func__, dev_name(dev), drv->name);
+	dev_dbg(dev, "bus: '%s': %s: matched device with driver %s\n",
+		drv->bus->name, __func__, drv->name);
 
 	pm_runtime_get_suppliers(dev);
 	if (dev->parent)
@@ -768,7 +822,7 @@ static int __driver_probe_device(struct device_driver *drv, struct device *dev)
  *
  * If the device has a parent, runtime-resume the parent before driver probing.
  */
-static int driver_probe_device(struct device_driver *drv, struct device *dev)
+static int driver_probe_device(const struct device_driver *drv, struct device *dev)
 {
 	int trigger_count = atomic_read(&deferred_trigger_count);
 	int ret;
@@ -792,7 +846,11 @@ static int driver_probe_device(struct device_driver *drv, struct device *dev)
 
 static inline bool cmdline_requested_async_probing(const char *drv_name)
 {
-	return parse_option_str(async_probe_drv_names, drv_name);
+	bool async_drv;
+
+	async_drv = parse_option_str(async_probe_drv_names, drv_name);
+
+	return (async_probe_default != async_drv);
 }
 
 /* The option format is "driver_async_probe=drv_name1,drv_name2,..." */
@@ -801,12 +859,14 @@ static int __init save_async_options(char *buf)
 	if (strlen(buf) >= ASYNC_DRV_NAMES_MAX_LEN)
 		pr_warn("Too long list of driver names for 'driver_async_probe'!\n");
 
-	strlcpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN);
-	return 0;
+	strscpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN);
+	async_probe_default = parse_option_str(async_probe_drv_names, "*");
+
+	return 1;
 }
 __setup("driver_async_probe=", save_async_options);
 
-bool driver_allows_async_probing(struct device_driver *drv)
+static bool driver_allows_async_probing(const struct device_driver *drv)
 {
 	switch (drv->probe_type) {
 	case PROBE_PREFER_ASYNCHRONOUS:
@@ -830,7 +890,7 @@ struct device_attach_data {
 	struct device *dev;
 
 	/*
-	 * Indicates whether we are are considering asynchronous probing or
+	 * Indicates whether we are considering asynchronous probing or
 	 * not. Only initial binding after device or driver registration
 	 * (including deferral processing) may be done asynchronously, the
 	 * rest is always synchronous, as we expect it is being done by
@@ -874,6 +934,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
 		dev_dbg(dev, "Device match requests probe deferral\n");
 		dev->can_match = true;
 		driver_deferred_probe_add(dev);
+		/*
+		 * Device can't match with a driver right now, so don't attempt
+		 * to match or bind with other drivers on the bus.
+		 */
+		return ret;
 	} else if (ret < 0) {
 		dev_dbg(dev, "Bus failed to match device: %d\n", ret);
 		return ret;
@@ -936,6 +1001,7 @@ out_unlock:
 static int __device_attach(struct device *dev, bool allow_async)
 {
 	int ret = 0;
+	bool async = false;
 
 	device_lock(dev);
 	if (dev->p->dead) {
@@ -949,7 +1015,7 @@ static int __device_attach(struct device *dev, bool allow_async)
 		if (ret == 0)
 			ret = 1;
 		else {
-			dev->driver = NULL;
+			device_set_driver(dev, NULL);
 			ret = 0;
 		}
 	} else {
@@ -974,7 +1040,7 @@ static int __device_attach(struct device *dev, bool allow_async)
 			 */
 			dev_dbg(dev, "scheduling asynchronous probe\n");
 			get_device(dev);
-			async_schedule_dev(__device_attach_async_helper, dev);
+			async = true;
 		} else {
 			pm_request_idle(dev);
 		}
@@ -984,6 +1050,8 @@ static int __device_attach(struct device *dev, bool allow_async)
 	}
 out_unlock:
 	device_unlock(dev);
+	if (async)
+		async_schedule_dev(__device_attach_async_helper, dev);
 	return ret;
 }
 
@@ -1009,7 +1077,15 @@ EXPORT_SYMBOL_GPL(device_attach);
 
 void device_initial_probe(struct device *dev)
 {
-	__device_attach(dev, true);
+	struct subsys_private *sp = bus_to_subsys(dev->bus);
+
+	if (!sp)
+		return;
+
+	if (sp->drivers_autoprobe)
+		__device_attach(dev, true);
+
+	subsys_put(sp);
 }
 
 /*
@@ -1034,7 +1110,7 @@ static void __device_driver_lock(struct device *dev, struct device *parent)
  * @parent: Parent device. Needed if the bus requires parent lock
  *
  * This function will release the required locks for manipulating dev->drv.
- * Normally this will just be the the @dev lock, but when called for a
+ * Normally this will just be the @dev lock, but when called for a
  * USB interface, @parent lock will be released as well.
  */
 static void __device_driver_unlock(struct device *dev, struct device *parent)
@@ -1052,7 +1128,7 @@ static void __device_driver_unlock(struct device *dev, struct device *parent)
  * Manually attach driver to a device. Will acquire both @dev lock and
  * @dev->parent lock if needed. Returns 0 on success, -ERR on failure.
  */
-int device_driver_attach(struct device_driver *drv, struct device *dev)
+int device_driver_attach(const struct device_driver *drv, struct device *dev)
 {
 	int ret;
 
@@ -1072,11 +1148,12 @@ EXPORT_SYMBOL_GPL(device_driver_attach);
 static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie)
 {
 	struct device *dev = _dev;
-	struct device_driver *drv;
+	const struct device_driver *drv;
 	int ret;
 
 	__device_driver_lock(dev, dev->parent);
 	drv = dev->p->async_driver;
+	dev->p->async_driver = NULL;
 	ret = driver_probe_device(drv, dev);
 	__device_driver_unlock(dev, dev->parent);
 
@@ -1087,7 +1164,8 @@ static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie)
 
 static int __driver_attach(struct device *dev, void *data)
 {
-	struct device_driver *drv = data;
+	const struct device_driver *drv = data;
+	bool async = false;
 	int ret;
 
 	/*
@@ -1108,9 +1186,18 @@ static int __driver_attach(struct device *dev, void *data)
 		dev_dbg(dev, "Device match requests probe deferral\n");
 		dev->can_match = true;
 		driver_deferred_probe_add(dev);
+		/*
+		 * Driver could not match with device, but may match with
+		 * another device on the bus.
+		 */
+		return 0;
 	} else if (ret < 0) {
 		dev_dbg(dev, "Bus failed to match device: %d\n", ret);
-		return ret;
+		/*
+		 * Driver could not match with device, but may match with
+		 * another device on the bus.
+		 */
+		return 0;
 	} /* ret > 0 means positive match */
 
 	if (driver_allows_async_probing(drv)) {
@@ -1123,12 +1210,14 @@ static int __driver_attach(struct device *dev, void *data)
 		 */
 		dev_dbg(dev, "probing driver %s asynchronously\n", drv->name);
 		device_lock(dev);
-		if (!dev->driver) {
+		if (!dev->driver && !dev->p->async_driver) {
 			get_device(dev);
 			dev->p->async_driver = drv;
-			async_schedule_dev(__driver_attach_async_helper, dev);
+			async = true;
 		}
 		device_unlock(dev);
+		if (async)
+			async_schedule_dev(__driver_attach_async_helper, dev);
 		return 0;
 	}
 
@@ -1148,9 +1237,10 @@ static int __driver_attach(struct device *dev, void *data)
  * returns 0 and the @dev->driver is set, we've found a
  * compatible pair.
  */
-int driver_attach(struct device_driver *drv)
+int driver_attach(const struct device_driver *drv)
 {
-	return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
+	/* The (void *) will be put back to const * in __driver_attach() */
+	return bus_for_each_dev(drv->bus, NULL, (void *)drv, __driver_attach);
 }
 EXPORT_SYMBOL_GPL(driver_attach);
 
@@ -1185,45 +1275,28 @@ static void __device_release_driver(struct device *dev, struct device *parent)
 
 		driver_sysfs_remove(dev);
 
-		if (dev->bus)
-			blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-						     BUS_NOTIFY_UNBIND_DRIVER,
-						     dev);
+		bus_notify(dev, BUS_NOTIFY_UNBIND_DRIVER);
 
 		pm_runtime_put_sync(dev);
 
-		device_remove_file(dev, &dev_attr_state_synced);
-		device_remove_groups(dev, drv->dev_groups);
+		device_remove(dev);
 
-		if (dev->bus && dev->bus->remove)
-			dev->bus->remove(dev);
-		else if (drv->remove)
-			drv->remove(dev);
+		if (dev->bus && dev->bus->dma_cleanup)
+			dev->bus->dma_cleanup(dev);
 
+		device_unbind_cleanup(dev);
 		device_links_driver_cleanup(dev);
 
-		devres_release_all(dev);
-		arch_teardown_dma_ops(dev);
-		dev->driver = NULL;
-		dev_set_drvdata(dev, NULL);
-		if (dev->pm_domain && dev->pm_domain->dismiss)
-			dev->pm_domain->dismiss(dev);
-		pm_runtime_reinit(dev);
-		dev_pm_set_driver_flags(dev, 0);
-
 		klist_remove(&dev->p->knode_driver);
 		device_pm_check_callbacks(dev);
-		if (dev->bus)
-			blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
-						     BUS_NOTIFY_UNBOUND_DRIVER,
-						     dev);
 
+		bus_notify(dev, BUS_NOTIFY_UNBOUND_DRIVER);
 		kobject_uevent(&dev->kobj, KOBJ_UNBIND);
 	}
 }
 
 void device_release_driver_internal(struct device *dev,
-				    struct device_driver *drv,
+				    const struct device_driver *drv,
 				    struct device *parent)
 {
 	__device_driver_lock(dev, parent);
@@ -1272,7 +1345,7 @@ void device_driver_detach(struct device *dev)
  * driver_detach - detach driver from all devices it controls.
  * @drv: driver.
  */
-void driver_detach(struct device_driver *drv)
+void driver_detach(const struct device_driver *drv)
 {
 	struct device_private *dev_prv;
 	struct device *dev;
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index f4d794d6bb85..55bdc7f5e59d 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -18,17 +18,51 @@ static struct class devcd_class;
 /* global disable flag, for security purposes */
 static bool devcd_disabled;
 
-/* if data isn't read by userspace after 5 minutes then delete it */
-#define DEVCD_TIMEOUT	(HZ * 60 * 5)
-
 struct devcd_entry {
 	struct device devcd_dev;
 	void *data;
 	size_t datalen;
+	/*
+	 * There are 2 races for which mutex is required.
+	 *
+	 * The first race is between device creation and userspace writing to
+	 * schedule immediately destruction.
+	 *
+	 * This race is handled by arming the timer before device creation, but
+	 * when device creation fails the timer still exists.
+	 *
+	 * To solve this, hold the mutex during device_add(), and set
+	 * init_completed on success before releasing the mutex.
+	 *
+	 * That way the timer will never fire until device_add() is called,
+	 * it will do nothing if init_completed is not set. The timer is also
+	 * cancelled in that case.
+	 *
+	 * The second race involves multiple parallel invocations of devcd_free(),
+	 * add a deleted flag so only 1 can call the destructor.
+	 */
+	struct mutex mutex;
+	bool init_completed, deleted;
 	struct module *owner;
 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen);
 	void (*free)(void *data);
+	/*
+	 * If nothing interferes and device_add() was returns success,
+	 * del_wk will destroy the device after the timer fires.
+	 *
+	 * Multiple userspace processes can interfere in the working of the timer:
+	 * - Writing to the coredump will reschedule the timer to run immediately,
+	 *   if still armed.
+	 *
+	 *   This is handled by using "if (cancel_delayed_work()) {
+	 *   schedule_delayed_work() }", to prevent re-arming after having
+	 *   been previously fired.
+	 * - Writing to /sys/class/devcoredump/disabled will destroy the
+	 *   coredump synchronously.
+	 *   This is handled by using disable_delayed_work_sync(), and then
+	 *   checking if deleted flag is set with &devcd->mutex held.
+	 */
 	struct delayed_work del_wk;
 	struct device *failing_dev;
 };
@@ -57,18 +91,31 @@ static void devcd_dev_release(struct device *dev)
 	kfree(devcd);
 }
 
+static void __devcd_del(struct devcd_entry *devcd)
+{
+	devcd->deleted = true;
+	device_del(&devcd->devcd_dev);
+	put_device(&devcd->devcd_dev);
+}
+
 static void devcd_del(struct work_struct *wk)
 {
 	struct devcd_entry *devcd;
+	bool init_completed;
 
 	devcd = container_of(wk, struct devcd_entry, del_wk.work);
 
-	device_del(&devcd->devcd_dev);
-	put_device(&devcd->devcd_dev);
+	/* devcd->mutex serializes against dev_coredumpm_timeout */
+	mutex_lock(&devcd->mutex);
+	init_completed = devcd->init_completed;
+	mutex_unlock(&devcd->mutex);
+
+	if (init_completed)
+		__devcd_del(devcd);
 }
 
 static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
-			       struct bin_attribute *bin_attr,
+			       const struct bin_attribute *bin_attr,
 			       char *buffer, loff_t offset, size_t count)
 {
 	struct device *dev = kobj_to_dev(kobj);
@@ -78,25 +125,26 @@ static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
 }
 
 static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *bin_attr,
+				const struct bin_attribute *bin_attr,
 				char *buffer, loff_t offset, size_t count)
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	mod_delayed_work(system_wq, &devcd->del_wk, 0);
+	/*
+	 * Although it's tempting to use mod_delayed work here,
+	 * that will cause a reschedule if the timer already fired.
+	 */
+	if (cancel_delayed_work(&devcd->del_wk))
+		schedule_delayed_work(&devcd->del_wk, 0);
 
 	return count;
 }
 
-static struct bin_attribute devcd_attr_data = {
-	.attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, },
-	.size = 0,
-	.read = devcd_data_read,
-	.write = devcd_data_write,
-};
+static const struct bin_attribute devcd_attr_data =
+	__BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0);
 
-static struct bin_attribute *devcd_dev_bin_attrs[] = {
+static const struct bin_attribute *const devcd_dev_bin_attrs[] = {
 	&devcd_attr_data, NULL,
 };
 
@@ -112,17 +160,54 @@ static int devcd_free(struct device *dev, void *data)
 {
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	flush_delayed_work(&devcd->del_wk);
+	/*
+	 * To prevent a race with devcd_data_write(), disable work and
+	 * complete manually instead.
+	 *
+	 * We cannot rely on the return value of
+	 * disable_delayed_work_sync() here, because it might be in the
+	 * middle of a cancel_delayed_work + schedule_delayed_work pair.
+	 *
+	 * devcd->mutex here guards against multiple parallel invocations
+	 * of devcd_free().
+	 */
+	disable_delayed_work_sync(&devcd->del_wk);
+	mutex_lock(&devcd->mutex);
+	if (!devcd->deleted)
+		__devcd_del(devcd);
+	mutex_unlock(&devcd->mutex);
 	return 0;
 }
 
-static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
+static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr,
 			     char *buf)
 {
 	return sysfs_emit(buf, "%d\n", devcd_disabled);
 }
 
-static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
+/*
+ *
+ *	disabled_store()                                	worker()
+ *	 class_for_each_device(&devcd_class,
+ *		NULL, NULL, devcd_free)
+ *         ...
+ *         ...
+ *	   while ((dev = class_dev_iter_next(&iter))
+ *                                                             devcd_del()
+ *                                                               device_del()
+ *                                                                 put_device() <- last reference
+ *             error = fn(dev, data)                           devcd_dev_release()
+ *             devcd_free(dev, data)                           kfree(devcd)
+ *
+ *
+ * In the above diagram, it looks like disabled_store() would be racing with parallelly
+ * running devcd_del() and result in memory abort after dropping its last reference with
+ * put_device(). However, this will not happens as fn(dev, data) runs
+ * with its own reference to device via klist_node so it is not its last reference.
+ * so, above situation would not occur.
+ */
+
+static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,
 			      const char *buf, size_t count)
 {
 	long tmp = simple_strtol(buf, NULL, 10);
@@ -151,7 +236,6 @@ ATTRIBUTE_GROUPS(devcd_class);
 
 static struct class devcd_class = {
 	.name		= "devcoredump",
-	.owner		= THIS_MODULE,
 	.dev_release	= devcd_dev_release,
 	.dev_groups	= devcd_dev_groups,
 	.class_groups	= devcd_class_groups,
@@ -214,6 +298,8 @@ static void devcd_free_sgtable(void *data)
  * @offset: start copy from @offset@ bytes from the head of the data
  *	in the given scatterlist
  * @data_len: the length of the data in the sg_table
+ *
+ * Returns: the number of bytes copied
  */
 static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
 				       size_t buf_len, void *data,
@@ -231,7 +317,31 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
 }
 
 /**
- * dev_coredumpm - create device coredump with read/free methods
+ * dev_coredump_put - remove device coredump
+ * @dev: the struct device for the crashed device
+ *
+ * dev_coredump_put() removes coredump, if exists, for a given device from
+ * the file system and free its associated data otherwise, does nothing.
+ *
+ * It is useful for modules that do not want to keep coredump
+ * available after its unload.
+ */
+void dev_coredump_put(struct device *dev)
+{
+	struct device *existing;
+
+	existing = class_find_device(&devcd_class, NULL, dev,
+				     devcd_match_failing);
+	if (existing) {
+		devcd_free(existing, NULL);
+		put_device(existing);
+	}
+}
+EXPORT_SYMBOL_GPL(dev_coredump_put);
+
+/**
+ * dev_coredumpm_timeout - create device coredump with read/free methods with a
+ * custom timeout.
  * @dev: the struct device for the crashed device
  * @owner: the module that contains the read/free functions, use %THIS_MODULE
  * @data: data cookie for the @read/@free functions
@@ -239,17 +349,20 @@ static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
  * @gfp: allocation flags
  * @read: function to read from the given buffer
  * @free: function to free the given buffer
+ * @timeout: time in jiffies to remove coredump
  *
  * Creates a new device coredump for the given device. If a previous one hasn't
  * been read yet, the new coredump is discarded. The data lifetime is determined
  * by the device coredump framework and when it is no longer needed the @free
  * function will be called to free the data.
  */
-void dev_coredumpm(struct device *dev, struct module *owner,
-		   void *data, size_t datalen, gfp_t gfp,
-		   ssize_t (*read)(char *buffer, loff_t offset, size_t count,
-				   void *data, size_t datalen),
-		   void (*free)(void *data))
+void dev_coredumpm_timeout(struct device *dev, struct module *owner,
+			   void *data, size_t datalen, gfp_t gfp,
+			   ssize_t (*read)(char *buffer, loff_t offset,
+					   size_t count, void *data,
+					   size_t datalen),
+			   void (*free)(void *data),
+			   unsigned long timeout)
 {
 	static atomic_t devcd_count = ATOMIC_INIT(0);
 	struct devcd_entry *devcd;
@@ -278,13 +391,23 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 	devcd->read = read;
 	devcd->free = free;
 	devcd->failing_dev = get_device(dev);
+	devcd->deleted = false;
 
+	mutex_init(&devcd->mutex);
 	device_initialize(&devcd->devcd_dev);
 
 	dev_set_name(&devcd->devcd_dev, "devcd%d",
 		     atomic_inc_return(&devcd_count));
 	devcd->devcd_dev.class = &devcd_class;
 
+	dev_set_uevent_suppress(&devcd->devcd_dev, true);
+
+	/* devcd->mutex prevents devcd_del() completing until init finishes */
+	mutex_lock(&devcd->mutex);
+	devcd->init_completed = false;
+	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+	schedule_delayed_work(&devcd->del_wk, timeout);
+
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
 
@@ -299,18 +422,28 @@ void dev_coredumpm(struct device *dev, struct module *owner,
 		              "devcoredump"))
 		dev_warn(dev, "devcoredump create_link failed\n");
 
-	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
-	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+	dev_set_uevent_suppress(&devcd->devcd_dev, false);
+	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
 
+	/*
+	 * Safe to run devcd_del() now that we are done with devcd_dev.
+	 * Alternatively we could have taken a ref on devcd_dev before
+	 * dropping the lock.
+	 */
+	devcd->init_completed = true;
+	mutex_unlock(&devcd->mutex);
 	return;
  put_device:
+	mutex_unlock(&devcd->mutex);
+	cancel_delayed_work_sync(&devcd->del_wk);
 	put_device(&devcd->devcd_dev);
+
  put_module:
 	module_put(owner);
  free:
 	free(data);
 }
-EXPORT_SYMBOL_GPL(dev_coredumpm);
+EXPORT_SYMBOL_GPL(dev_coredumpm_timeout);
 
 /**
  * dev_coredumpsg - create device coredump that uses scatterlist as data
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index eaa9a5cd1db9..f54db6d138ab 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -29,10 +29,10 @@ struct devres {
 	 * Some archs want to perform DMA into kmalloc caches
 	 * and need a guaranteed alignment larger than
 	 * the alignment of a 64-bit integer.
-	 * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
-	 * buffer alignment as if it was allocated by plain kmalloc().
+	 * Thus we use ARCH_DMA_MINALIGN for data[] which will force the same
+	 * alignment for struct devres when allocated by kmalloc().
 	 */
-	u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+	u8 __aligned(ARCH_DMA_MINALIGN) data[];
 };
 
 struct devres_group {
@@ -85,7 +85,7 @@ static void group_close_release(struct device *dev, void *res)
 	/* noop */
 }
 
-static struct devres_group * node_to_group(struct devres_node *node)
+static struct devres_group *node_to_group(struct devres_node *node)
 {
 	if (node->release == &group_open_release)
 		return container_of(node, struct devres_group, node[0]);
@@ -101,11 +101,14 @@ static bool check_dr_size(size_t size, size_t *tot_size)
 					size, tot_size)))
 		return false;
 
+	/* Actually allocate the full kmalloc bucket size. */
+	*tot_size = kmalloc_size_roundup(*tot_size);
+
 	return true;
 }
 
-static __always_inline struct devres * alloc_dr(dr_release_t release,
-						size_t size, gfp_t gfp, int nid)
+static __always_inline struct devres *alloc_dr(dr_release_t release,
+					       size_t size, gfp_t gfp, int nid)
 {
 	size_t tot_size;
 	struct devres *dr;
@@ -117,7 +120,9 @@ static __always_inline struct devres * alloc_dr(dr_release_t release,
 	if (unlikely(!dr))
 		return NULL;
 
-	memset(dr, 0, offsetof(struct devres, data));
+	/* No need to clear memory twice */
+	if (!(gfp & __GFP_ZERO))
+		memset(dr, 0, offsetof(struct devres, data));
 
 	INIT_LIST_HEAD(&dr->node.entry);
 	dr->node.release = release;
@@ -278,8 +283,8 @@ static struct devres *find_dr(struct device *dev, dr_release_t release,
  * RETURNS:
  * Pointer to found devres, NULL if not found.
  */
-void * devres_find(struct device *dev, dr_release_t release,
-		   dr_match_t match, void *match_data)
+void *devres_find(struct device *dev, dr_release_t release,
+		  dr_match_t match, void *match_data)
 {
 	struct devres *dr;
 	unsigned long flags;
@@ -308,8 +313,8 @@ EXPORT_SYMBOL_GPL(devres_find);
  * RETURNS:
  * Pointer to found or added devres.
  */
-void * devres_get(struct device *dev, void *new_res,
-		  dr_match_t match, void *match_data)
+void *devres_get(struct device *dev, void *new_res,
+		 dr_match_t match, void *match_data)
 {
 	struct devres *new_dr = container_of(new_res, struct devres, data);
 	struct devres *dr;
@@ -344,8 +349,8 @@ EXPORT_SYMBOL_GPL(devres_get);
  * RETURNS:
  * Pointer to removed devres on success, NULL if not found.
  */
-void * devres_remove(struct device *dev, dr_release_t release,
-		     dr_match_t match, void *match_data)
+void *devres_remove(struct device *dev, dr_release_t release,
+		    dr_match_t match, void *match_data)
 {
 	struct devres *dr;
 	unsigned long flags;
@@ -544,7 +549,7 @@ int devres_release_all(struct device *dev)
  * RETURNS:
  * ID of the new group, NULL on failure.
  */
-void * devres_open_group(struct device *dev, void *id, gfp_t gfp)
+void *devres_open_group(struct device *dev, void *id, gfp_t gfp)
 {
 	struct devres_group *grp;
 	unsigned long flags;
@@ -562,6 +567,7 @@ void * devres_open_group(struct device *dev, void *id, gfp_t gfp)
 	grp->id = grp;
 	if (id)
 		grp->id = id;
+	grp->color = 0;
 
 	spin_lock_irqsave(&dev->devres_lock, flags);
 	add_dr(dev, &grp->node[0]);
@@ -570,8 +576,11 @@ void * devres_open_group(struct device *dev, void *id, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(devres_open_group);
 
-/* Find devres group with ID @id.  If @id is NULL, look for the latest. */
-static struct devres_group * find_group(struct device *dev, void *id)
+/*
+ * Find devres group with ID @id.  If @id is NULL, look for the latest open
+ * group.
+ */
+static struct devres_group *find_group(struct device *dev, void *id)
 {
 	struct devres_node *node;
 
@@ -681,6 +690,13 @@ int devres_release_group(struct device *dev, void *id)
 		spin_unlock_irqrestore(&dev->devres_lock, flags);
 
 		release_nodes(dev, &todo);
+	} else if (list_empty(&dev->devres_head)) {
+		/*
+		 * dev is probably dying via devres_release_all(): groups
+		 * have already been removed and are on the process of
+		 * being released - don't touch and don't warn.
+		 */
+		spin_unlock_irqrestore(&dev->devres_lock, flags);
 	} else {
 		WARN_ON(1);
 		spin_unlock_irqrestore(&dev->devres_lock, flags);
@@ -692,7 +708,7 @@ EXPORT_SYMBOL_GPL(devres_release_group);
 
 /*
  * Custom devres actions allow inserting a simple function call
- * into the teadown sequence.
+ * into the teardown sequence.
  */
 
 struct action_devres {
@@ -717,20 +733,21 @@ static void devm_action_release(struct device *dev, void *res)
 }
 
 /**
- * devm_add_action() - add a custom action to list of managed resources
+ * __devm_add_action() - add a custom action to list of managed resources
  * @dev: Device that owns the action
  * @action: Function that should be called
  * @data: Pointer to data passed to @action implementation
+ * @name: Name of the resource (for debugging purposes)
  *
  * This adds a custom action to the list of managed resources so that
  * it gets executed as part of standard resource unwinding.
  */
-int devm_add_action(struct device *dev, void (*action)(void *), void *data)
+int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name)
 {
 	struct action_devres *devres;
 
-	devres = devres_alloc(devm_action_release,
-			      sizeof(struct action_devres), GFP_KERNEL);
+	devres = __devres_alloc_node(devm_action_release, sizeof(struct action_devres),
+				     GFP_KERNEL, NUMA_NO_NODE, name);
 	if (!devres)
 		return -ENOMEM;
 
@@ -740,28 +757,52 @@ int devm_add_action(struct device *dev, void (*action)(void *), void *data)
 	devres_add(dev, devres);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(devm_add_action);
+EXPORT_SYMBOL_GPL(__devm_add_action);
+
+bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data)
+{
+	struct action_devres devres = {
+		.data = data,
+		.action = action,
+	};
+
+	return devres_find(dev, devm_action_release, devm_action_match, &devres);
+}
+EXPORT_SYMBOL_GPL(devm_is_action_added);
 
 /**
- * devm_remove_action() - removes previously added custom action
+ * devm_remove_action_nowarn() - removes previously added custom action
  * @dev: Device that owns the action
  * @action: Function implementing the action
  * @data: Pointer to data passed to @action implementation
  *
  * Removes instance of @action previously added by devm_add_action().
  * Both action and data should match one of the existing entries.
+ *
+ * In contrast to devm_remove_action(), this function does not WARN() if no
+ * entry could have been found.
+ *
+ * This should only be used if the action is contained in an object with
+ * independent lifetime management, e.g. the Devres rust abstraction.
+ *
+ * Causing the warning from regular driver code most likely indicates an abuse
+ * of the devres API.
+ *
+ * Returns: 0 on success, -ENOENT if no entry could have been found.
  */
-void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
+int devm_remove_action_nowarn(struct device *dev,
+			      void (*action)(void *),
+			      void *data)
 {
 	struct action_devres devres = {
 		.data = data,
 		.action = action,
 	};
 
-	WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match,
-			       &devres));
+	return devres_destroy(dev, devm_action_release, devm_action_match,
+			      &devres);
 }
-EXPORT_SYMBOL_GPL(devm_remove_action);
+EXPORT_SYMBOL_GPL(devm_remove_action_nowarn);
 
 /**
  * devm_release_action() - release previously added custom action
@@ -890,9 +931,12 @@ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp)
 	/*
 	 * Otherwise: allocate new, larger chunk. We need to allocate before
 	 * taking the lock as most probably the caller uses GFP_KERNEL.
+	 * alloc_dr() will call check_dr_size() to reserve extra memory
+	 * for struct devres automatically, so size @new_size user request
+	 * is delivered to it directly as devm_kmalloc() does.
 	 */
 	new_dr = alloc_dr(devm_kmalloc_release,
-			  total_new_size, gfp, dev_to_node(dev));
+			  new_size, gfp, dev_to_node(dev));
 	if (!new_dr)
 		return NULL;
 
@@ -916,7 +960,7 @@ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp)
 
 	/*
 	 * We can copy the memory contents after releasing the lock as we're
-	 * no longer modyfing the list links.
+	 * no longer modifying the list links.
 	 */
 	memcpy(new_dr->data, old_dr->data,
 	       total_old_size - offsetof(struct devres, data));
@@ -943,17 +987,10 @@ EXPORT_SYMBOL_GPL(devm_krealloc);
  */
 char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
 {
-	size_t size;
-	char *buf;
-
 	if (!s)
 		return NULL;
 
-	size = strlen(s) + 1;
-	buf = devm_kmalloc(dev, size, gfp);
-	if (buf)
-		memcpy(buf, s, size);
-	return buf;
+	return devm_kmemdup(dev, s, strlen(s) + 1, gfp);
 }
 EXPORT_SYMBOL_GPL(devm_kstrdup);
 
@@ -1080,6 +1117,27 @@ void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(devm_kmemdup);
 
+/**
+ * devm_kmemdup_const - conditionally duplicate and manage a region of memory
+ *
+ * @dev: Device this memory belongs to
+ * @src: memory region to duplicate
+ * @len: memory region length,
+ * @gfp: GFP mask to use
+ *
+ * Return: source address if it is in .rodata or the return value of kmemdup()
+ * to which the function falls back otherwise.
+ */
+const void *
+devm_kmemdup_const(struct device *dev, const void *src, size_t len, gfp_t gfp)
+{
+	if (is_kernel_rodata((unsigned long)src))
+		return src;
+
+	return devm_kmemdup(dev, src, len, gfp);
+}
+EXPORT_SYMBOL_GPL(devm_kmemdup_const);
+
 struct pages_devres {
 	unsigned long addr;
 	unsigned int order;
@@ -1164,13 +1222,6 @@ static void devm_percpu_release(struct device *dev, void *pdata)
 	free_percpu(p);
 }
 
-static int devm_percpu_match(struct device *dev, void *data, void *p)
-{
-	struct devres *devr = container_of(data, struct devres, data);
-
-	return *(void **)devr->data == p;
-}
-
 /**
  * __devm_alloc_percpu - Resource-managed alloc_percpu
  * @dev: Device to allocate per-cpu memory for
@@ -1206,17 +1257,3 @@ void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
 	return pcpu;
 }
 EXPORT_SYMBOL_GPL(__devm_alloc_percpu);
-
-/**
- * devm_free_percpu - Resource-managed free_percpu
- * @dev: Device this memory belongs to
- * @pdata: Per-cpu memory to free
- *
- * Free memory allocated with devm_alloc_percpu().
- */
-void devm_free_percpu(struct device *dev, void __percpu *pdata)
-{
-	WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match,
-			       (__force void *)pdata));
-}
-EXPORT_SYMBOL_GPL(devm_free_percpu);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 8be352ab4ddb..194b44075ac7 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -13,11 +13,13 @@
  * overwrite the default setting if needed.
  */
 
+#define pr_fmt(fmt) "devtmpfs: " fmt
+
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/device.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
@@ -29,6 +31,12 @@
 #include <uapi/linux/mount.h>
 #include "base.h"
 
+#ifdef CONFIG_DEVTMPFS_SAFE
+#define DEVTMPFS_MFLAGS       (MS_SILENT | MS_NOEXEC | MS_NOSUID)
+#else
+#define DEVTMPFS_MFLAGS       (MS_SILENT)
+#endif
+
 static struct task_struct *thread;
 
 static int __initdata mount_dev = IS_ENABLED(CONFIG_DEVTMPFS_MOUNT);
@@ -55,40 +63,51 @@ __setup("devtmpfs.mount=", mount_param);
 
 static struct vfsmount *mnt;
 
-static struct dentry *public_dev_mount(struct file_system_type *fs_type, int flags,
-		      const char *dev_name, void *data)
-{
-	struct super_block *s = mnt->mnt_sb;
-	atomic_inc(&s->s_active);
-	down_write(&s->s_umount);
-	return dget(s->s_root);
-}
-
 static struct file_system_type internal_fs_type = {
 	.name = "devtmpfs",
 #ifdef CONFIG_TMPFS
 	.init_fs_context = shmem_init_fs_context,
-	.parameters	= shmem_fs_parameters,
 #else
 	.init_fs_context = ramfs_init_fs_context,
-	.parameters	= ramfs_fs_parameters,
 #endif
-	.kill_sb = kill_litter_super,
+	.kill_sb = kill_anon_super,
 };
 
-static struct file_system_type dev_fs_type = {
-	.name = "devtmpfs",
-	.mount = public_dev_mount,
-};
-
-#ifdef CONFIG_BLOCK
-static inline int is_blockdev(struct device *dev)
+/* Simply take a ref on the existing mount */
+static int devtmpfs_get_tree(struct fs_context *fc)
 {
-	return dev->class == &block_class;
+	struct super_block *sb = mnt->mnt_sb;
+
+	atomic_inc(&sb->s_active);
+	down_write(&sb->s_umount);
+	fc->root = dget(sb->s_root);
+	return 0;
 }
+
+/* Ops are filled in during init depending on underlying shmem or ramfs type */
+struct fs_context_operations devtmpfs_context_ops = {};
+
+/* Call the underlying initialization and set to our ops */
+static int devtmpfs_init_fs_context(struct fs_context *fc)
+{
+	int ret;
+#ifdef CONFIG_TMPFS
+	ret = shmem_init_fs_context(fc);
 #else
-static inline int is_blockdev(struct device *dev) { return 0; }
+	ret = ramfs_init_fs_context(fc);
 #endif
+	if (ret < 0)
+		return ret;
+
+	fc->ops = &devtmpfs_context_ops;
+
+	return 0;
+}
+
+static struct file_system_type dev_fs_type = {
+	.name = "devtmpfs",
+	.init_fs_context = devtmpfs_init_fs_context,
+};
 
 static int devtmpfs_submit_req(struct req *req, const char *tmp)
 {
@@ -156,18 +175,17 @@ static int dev_mkdir(const char *name, umode_t mode)
 {
 	struct dentry *dentry;
 	struct path path;
-	int err;
 
-	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+	dentry = start_creating_path(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, mode);
-	if (!err)
+	dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, NULL);
+	if (!IS_ERR(dentry))
 		/* mark as kernel-created inode */
 		d_inode(dentry)->i_private = &thread;
-	done_path_create(&path, dentry);
-	return err;
+	end_creating_path(&path, dentry);
+	return PTR_ERR_OR_ZERO(dentry);
 }
 
 static int create_path(const char *nodepath)
@@ -204,16 +222,16 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
 	struct path path;
 	int err;
 
-	dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+	dentry = start_creating_path(AT_FDCWD, nodename, &path, 0);
 	if (dentry == ERR_PTR(-ENOENT)) {
 		create_path(nodename);
-		dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+		dentry = start_creating_path(AT_FDCWD, nodename, &path, 0);
 	}
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	err = vfs_mknod(&init_user_ns, d_inode(path.dentry), dentry, mode,
-			dev->devt);
+	err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode,
+			dev->devt, NULL);
 	if (!err) {
 		struct iattr newattrs;
 
@@ -222,13 +240,13 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
 		newattrs.ia_gid = gid;
 		newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
 		inode_lock(d_inode(dentry));
-		notify_change(&init_user_ns, dentry, &newattrs, NULL);
+		notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
 		inode_unlock(d_inode(dentry));
 
 		/* mark as kernel-created inode */
 		d_inode(dentry)->i_private = &thread;
 	}
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return err;
 }
 
@@ -238,21 +256,16 @@ static int dev_rmdir(const char *name)
 	struct dentry *dentry;
 	int err;
 
-	dentry = kern_path_locked(name, &parent);
+	dentry = start_removing_path(name, &parent);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
-	if (d_really_is_positive(dentry)) {
-		if (d_inode(dentry)->i_private == &thread)
-			err = vfs_rmdir(&init_user_ns, d_inode(parent.dentry),
-					dentry);
-		else
-			err = -EPERM;
-	} else {
-		err = -ENOENT;
-	}
-	dput(dentry);
-	inode_unlock(d_inode(parent.dentry));
-	path_put(&parent);
+	if (d_inode(dentry)->i_private == &thread)
+		err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
+				dentry, NULL);
+	else
+		err = -EPERM;
+
+	end_removing_path(&parent, dentry);
 	return err;
 }
 
@@ -281,7 +294,7 @@ static int delete_path(const char *nodepath)
 	return err;
 }
 
-static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
+static int dev_mynode(struct device *dev, struct inode *inode)
 {
 	/* did we create it */
 	if (inode->i_private != &thread)
@@ -289,13 +302,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta
 
 	/* does the dev_t match */
 	if (is_blockdev(dev)) {
-		if (!S_ISBLK(stat->mode))
+		if (!S_ISBLK(inode->i_mode))
 			return 0;
 	} else {
-		if (!S_ISCHR(stat->mode))
+		if (!S_ISCHR(inode->i_mode))
 			return 0;
 	}
-	if (stat->rdev != dev->devt)
+	if (inode->i_rdev != dev->devt)
 		return 0;
 
 	/* ours */
@@ -306,44 +319,36 @@ static int handle_remove(const char *nodename, struct device *dev)
 {
 	struct path parent;
 	struct dentry *dentry;
+	struct inode *inode;
 	int deleted = 0;
-	int err;
+	int err = 0;
 
-	dentry = kern_path_locked(nodename, &parent);
+	dentry = start_removing_path(nodename, &parent);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	if (d_really_is_positive(dentry)) {
-		struct kstat stat;
-		struct path p = {.mnt = parent.mnt, .dentry = dentry};
-		err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
-				  AT_STATX_SYNC_AS_STAT);
-		if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
-			struct iattr newattrs;
-			/*
-			 * before unlinking this node, reset permissions
-			 * of possible references like hardlinks
-			 */
-			newattrs.ia_uid = GLOBAL_ROOT_UID;
-			newattrs.ia_gid = GLOBAL_ROOT_GID;
-			newattrs.ia_mode = stat.mode & ~0777;
-			newattrs.ia_valid =
-				ATTR_UID|ATTR_GID|ATTR_MODE;
-			inode_lock(d_inode(dentry));
-			notify_change(&init_user_ns, dentry, &newattrs, NULL);
-			inode_unlock(d_inode(dentry));
-			err = vfs_unlink(&init_user_ns, d_inode(parent.dentry),
-					 dentry, NULL);
-			if (!err || err == -ENOENT)
-				deleted = 1;
-		}
-	} else {
-		err = -ENOENT;
+	inode = d_inode(dentry);
+	if (dev_mynode(dev, inode)) {
+		struct iattr newattrs;
+		/*
+		 * before unlinking this node, reset permissions
+		 * of possible references like hardlinks
+		 */
+		newattrs.ia_uid = GLOBAL_ROOT_UID;
+		newattrs.ia_gid = GLOBAL_ROOT_GID;
+		newattrs.ia_mode = inode->i_mode & ~0777;
+		newattrs.ia_valid =
+			ATTR_UID|ATTR_GID|ATTR_MODE;
+		inode_lock(d_inode(dentry));
+		notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
+		inode_unlock(d_inode(dentry));
+		err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
+				 dentry, NULL);
+		if (!err || err == -ENOENT)
+			deleted = 1;
 	}
-	dput(dentry);
-	inode_unlock(d_inode(parent.dentry));
+	end_removing_path(&parent, dentry);
 
-	path_put(&parent);
 	if (deleted && strchr(nodename, '/'))
 		delete_path(nodename);
 	return err;
@@ -363,11 +368,11 @@ int __init devtmpfs_mount(void)
 	if (!thread)
 		return 0;
 
-	err = init_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL);
+	err = init_mount("devtmpfs", "dev", "devtmpfs", DEVTMPFS_MFLAGS, NULL);
 	if (err)
-		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
+		pr_info("error mounting %d\n", err);
 	else
-		printk(KERN_INFO "devtmpfs: mounted\n");
+		pr_info("mounted\n");
 	return err;
 }
 
@@ -412,7 +417,7 @@ static noinline int __init devtmpfs_setup(void *p)
 	err = ksys_unshare(CLONE_NEWNS);
 	if (err)
 		goto out;
-	err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
+	err = init_mount("devtmpfs", "/", "devtmpfs", DEVTMPFS_MFLAGS, NULL);
 	if (err)
 		goto out;
 	init_chdir("/.."); /* will traverse into overmounted root */
@@ -439,6 +444,31 @@ static int __ref devtmpfsd(void *p)
 }
 
 /*
+ * Get the underlying (shmem/ramfs) context ops to build ours
+ */
+static int devtmpfs_configure_context(void)
+{
+	struct fs_context *fc;
+
+	fc = fs_context_for_reconfigure(mnt->mnt_root, mnt->mnt_sb->s_flags,
+					MS_RMT_MASK);
+	if (IS_ERR(fc))
+		return PTR_ERR(fc);
+
+	/* Set up devtmpfs_context_ops based on underlying type */
+	devtmpfs_context_ops.free	      = fc->ops->free;
+	devtmpfs_context_ops.dup	      = fc->ops->dup;
+	devtmpfs_context_ops.parse_param      = fc->ops->parse_param;
+	devtmpfs_context_ops.parse_monolithic = fc->ops->parse_monolithic;
+	devtmpfs_context_ops.get_tree	      = &devtmpfs_get_tree;
+	devtmpfs_context_ops.reconfigure      = fc->ops->reconfigure;
+
+	put_fs_context(fc);
+
+	return 0;
+}
+
+/*
  * Create devtmpfs instance, driver-core devices will add their device
  * nodes here.
  */
@@ -449,14 +479,19 @@ int __init devtmpfs_init(void)
 
 	mnt = vfs_kern_mount(&internal_fs_type, 0, "devtmpfs", opts);
 	if (IS_ERR(mnt)) {
-		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %ld\n",
-				PTR_ERR(mnt));
+		pr_err("unable to create devtmpfs %ld\n", PTR_ERR(mnt));
 		return PTR_ERR(mnt);
 	}
+
+	err = devtmpfs_configure_context();
+	if (err) {
+		pr_err("unable to configure devtmpfs type %d\n", err);
+		return err;
+	}
+
 	err = register_filesystem(&dev_fs_type);
 	if (err) {
-		printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
-		       "type %i\n", err);
+		pr_err("unable to register devtmpfs type %d\n", err);
 		return err;
 	}
 
@@ -469,11 +504,12 @@ int __init devtmpfs_init(void)
 	}
 
 	if (err) {
-		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
+		pr_err("unable to create devtmpfs %d\n", err);
 		unregister_filesystem(&dev_fs_type);
+		thread = NULL;
 		return err;
 	}
 
-	printk(KERN_INFO "devtmpfs: initialized\n");
+	pr_info("initialized\n");
 	return 0;
 }
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 8c0d33e182fd..8ab010ddf709 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -31,6 +31,81 @@ static struct device *next_device(struct klist_iter *i)
 }
 
 /**
+ * driver_set_override() - Helper to set or clear driver override.
+ * @dev: Device to change
+ * @override: Address of string to change (e.g. &device->driver_override);
+ *            The contents will be freed and hold newly allocated override.
+ * @s: NUL-terminated string, new driver name to force a match, pass empty
+ *     string to clear it ("" or "\n", where the latter is only for sysfs
+ *     interface).
+ * @len: length of @s
+ *
+ * Helper to set or clear driver override in a device, intended for the cases
+ * when the driver_override field is allocated by driver/bus code.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int driver_set_override(struct device *dev, const char **override,
+			const char *s, size_t len)
+{
+	const char *new, *old;
+	char *cp;
+
+	if (!override || !s)
+		return -EINVAL;
+
+	/*
+	 * The stored value will be used in sysfs show callback (sysfs_emit()),
+	 * which has a length limit of PAGE_SIZE and adds a trailing newline.
+	 * Thus we can store one character less to avoid truncation during sysfs
+	 * show.
+	 */
+	if (len >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	/*
+	 * Compute the real length of the string in case userspace sends us a
+	 * bunch of \0 characters like python likes to do.
+	 */
+	len = strlen(s);
+
+	if (!len) {
+		/* Empty string passed - clear override */
+		device_lock(dev);
+		old = *override;
+		*override = NULL;
+		device_unlock(dev);
+		kfree(old);
+
+		return 0;
+	}
+
+	cp = strnchr(s, len, '\n');
+	if (cp)
+		len = cp - s;
+
+	new = kstrndup(s, len, GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	device_lock(dev);
+	old = *override;
+	if (cp != s) {
+		*override = new;
+	} else {
+		/* "\n" passed - clear override */
+		kfree(new);
+		*override = NULL;
+	}
+	device_unlock(dev);
+
+	kfree(old);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(driver_set_override);
+
+/**
  * driver_for_each_device - Iterator for devices bound to a driver.
  * @drv: Driver we're iterating.
  * @start: Device to begin with
@@ -40,7 +115,7 @@ static struct device *next_device(struct klist_iter *i)
  * Iterate over the @drv's list of devices calling @fn for each one.
  */
 int driver_for_each_device(struct device_driver *drv, struct device *start,
-			   void *data, int (*fn)(struct device *, void *))
+			   void *data, device_iter_t fn)
 {
 	struct klist_iter i;
 	struct device *dev;
@@ -73,9 +148,9 @@ EXPORT_SYMBOL_GPL(driver_for_each_device);
  * if it does.  If the callback returns non-zero, this function will
  * return to the caller and not iterate over any more devices.
  */
-struct device *driver_find_device(struct device_driver *drv,
+struct device *driver_find_device(const struct device_driver *drv,
 				  struct device *start, const void *data,
-				  int (*match)(struct device *dev, const void *data))
+				  device_match_t match)
 {
 	struct klist_iter i;
 	struct device *dev;
@@ -85,9 +160,12 @@ struct device *driver_find_device(struct device_driver *drv,
 
 	klist_iter_init_node(&drv->p->klist_devices, &i,
 			     (start ? &start->p->knode_driver : NULL));
-	while ((dev = next_device(&i)))
-		if (match(dev, data) && get_device(dev))
+	while ((dev = next_device(&i))) {
+		if (match(dev, data)) {
+			get_device(dev);
 			break;
+		}
+	}
 	klist_iter_exit(&i);
 	return dev;
 }
@@ -98,7 +176,7 @@ EXPORT_SYMBOL_GPL(driver_find_device);
  * @drv: driver.
  * @attr: driver attribute descriptor.
  */
-int driver_create_file(struct device_driver *drv,
+int driver_create_file(const struct device_driver *drv,
 		       const struct driver_attribute *attr)
 {
 	int error;
@@ -116,7 +194,7 @@ EXPORT_SYMBOL_GPL(driver_create_file);
  * @drv: driver.
  * @attr: driver attribute descriptor.
  */
-void driver_remove_file(struct device_driver *drv,
+void driver_remove_file(const struct device_driver *drv,
 			const struct driver_attribute *attr)
 {
 	if (drv)
@@ -124,13 +202,13 @@ void driver_remove_file(struct device_driver *drv,
 }
 EXPORT_SYMBOL_GPL(driver_remove_file);
 
-int driver_add_groups(struct device_driver *drv,
+int driver_add_groups(const struct device_driver *drv,
 		      const struct attribute_group **groups)
 {
 	return sysfs_create_groups(&drv->p->kobj, groups);
 }
 
-void driver_remove_groups(struct device_driver *drv,
+void driver_remove_groups(const struct device_driver *drv,
 			  const struct attribute_group **groups)
 {
 	sysfs_remove_groups(&drv->p->kobj, groups);
@@ -149,7 +227,7 @@ int driver_register(struct device_driver *drv)
 	int ret;
 	struct device_driver *other;
 
-	if (!drv->bus->p) {
+	if (!bus_is_registered(drv->bus)) {
 		pr_err("Driver '%s' was unable to register with bus_type '%s' because the bus was not initialized.\n",
 			   drv->name, drv->bus->name);
 		return -EINVAL;
@@ -177,6 +255,7 @@ int driver_register(struct device_driver *drv)
 		return ret;
 	}
 	kobject_uevent(&drv->p->kobj, KOBJ_ADD);
+	deferred_probe_extend_timeout();
 
 	return ret;
 }
@@ -198,30 +277,3 @@ void driver_unregister(struct device_driver *drv)
 	bus_remove_driver(drv);
 }
 EXPORT_SYMBOL_GPL(driver_unregister);
-
-/**
- * driver_find - locate driver on a bus by its name.
- * @name: name of the driver.
- * @bus: bus to scan for the driver.
- *
- * Call kset_find_obj() to iterate over list of drivers on
- * a bus to find driver by name. Return driver if found.
- *
- * This routine provides no locking to prevent the driver it returns
- * from being unregistered or unloaded while the caller is using it.
- * The caller is responsible for preventing this.
- */
-struct device_driver *driver_find(const char *name, struct bus_type *bus)
-{
-	struct kobject *k = kset_find_obj(bus->p->drivers_kset, name);
-	struct driver_private *priv;
-
-	if (k) {
-		/* Drop reference added by kset_find_obj() */
-		kobject_put(k);
-		priv = to_driver(k);
-		return priv->driver;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(driver_find);
diff --git a/drivers/base/faux.c b/drivers/base/faux.c
new file mode 100644
index 000000000000..21dd02124231
--- /dev/null
+++ b/drivers/base/faux.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (c) 2025 The Linux Foundation
+ *
+ * A "simple" faux bus that allows devices to be created and added
+ * automatically to it.  This is to be used whenever you need to create a
+ * device that is not associated with any "real" system resources, and do
+ * not want to have to deal with a bus/driver binding logic.  It is
+ * intended to be very simple, with only a create and a destroy function
+ * available.
+ */
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/container_of.h>
+#include <linux/device/faux.h>
+#include "base.h"
+
+/*
+ * Internal wrapper structure so we can hold a pointer to the
+ * faux_device_ops for this device.
+ */
+struct faux_object {
+	struct faux_device faux_dev;
+	const struct faux_device_ops *faux_ops;
+	const struct attribute_group **groups;
+};
+#define to_faux_object(dev) container_of_const(dev, struct faux_object, faux_dev.dev)
+
+static struct device faux_bus_root = {
+	.init_name	= "faux",
+};
+
+static int faux_match(struct device *dev, const struct device_driver *drv)
+{
+	/* Match always succeeds, we only have one driver */
+	return 1;
+}
+
+static int faux_probe(struct device *dev)
+{
+	struct faux_object *faux_obj = to_faux_object(dev);
+	struct faux_device *faux_dev = &faux_obj->faux_dev;
+	const struct faux_device_ops *faux_ops = faux_obj->faux_ops;
+	int ret;
+
+	if (faux_ops && faux_ops->probe) {
+		ret = faux_ops->probe(faux_dev);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Add groups after the probe succeeds to ensure resources are
+	 * initialized correctly
+	 */
+	ret = device_add_groups(dev, faux_obj->groups);
+	if (ret && faux_ops && faux_ops->remove)
+		faux_ops->remove(faux_dev);
+
+	return ret;
+}
+
+static void faux_remove(struct device *dev)
+{
+	struct faux_object *faux_obj = to_faux_object(dev);
+	struct faux_device *faux_dev = &faux_obj->faux_dev;
+	const struct faux_device_ops *faux_ops = faux_obj->faux_ops;
+
+	device_remove_groups(dev, faux_obj->groups);
+
+	if (faux_ops && faux_ops->remove)
+		faux_ops->remove(faux_dev);
+}
+
+static const struct bus_type faux_bus_type = {
+	.name		= "faux",
+	.match		= faux_match,
+	.probe		= faux_probe,
+	.remove		= faux_remove,
+};
+
+static struct device_driver faux_driver = {
+	.name		= "faux_driver",
+	.bus		= &faux_bus_type,
+	.probe_type	= PROBE_FORCE_SYNCHRONOUS,
+	.suppress_bind_attrs = true,
+};
+
+static void faux_device_release(struct device *dev)
+{
+	struct faux_object *faux_obj = to_faux_object(dev);
+
+	kfree(faux_obj);
+}
+
+/**
+ * faux_device_create_with_groups - Create and register with the driver
+ *		core a faux device and populate the device with an initial
+ *		set of sysfs attributes.
+ * @name:	The name of the device we are adding, must be unique for
+ *		all faux devices.
+ * @parent:	Pointer to a potential parent struct device.  If set to
+ *		NULL, the device will be created in the "root" of the faux
+ *		device tree in sysfs.
+ * @faux_ops:	struct faux_device_ops that the new device will call back
+ *		into, can be NULL.
+ * @groups:	The set of sysfs attributes that will be created for this
+ *		device when it is registered with the driver core.
+ *
+ * Create a new faux device and register it in the driver core properly.
+ * If present, callbacks in @faux_ops will be called with the device that
+ * for the caller to do something with at the proper time given the
+ * device's lifecycle.
+ *
+ * Note, when this function is called, the functions specified in struct
+ * faux_ops can be called before the function returns, so be prepared for
+ * everything to be properly initialized before that point in time.  If the
+ * probe callback (if one is present) does NOT succeed, the creation of the
+ * device will fail and NULL will be returned.
+ *
+ * Return:
+ * * NULL if an error happened with creating the device
+ * * pointer to a valid struct faux_device that is registered with sysfs
+ */
+struct faux_device *faux_device_create_with_groups(const char *name,
+						   struct device *parent,
+						   const struct faux_device_ops *faux_ops,
+						   const struct attribute_group **groups)
+{
+	struct faux_object *faux_obj;
+	struct faux_device *faux_dev;
+	struct device *dev;
+	int ret;
+
+	faux_obj = kzalloc(sizeof(*faux_obj), GFP_KERNEL);
+	if (!faux_obj)
+		return NULL;
+
+	/* Save off the callbacks and groups so we can use them in the future */
+	faux_obj->faux_ops = faux_ops;
+	faux_obj->groups = groups;
+
+	/* Initialize the device portion and register it with the driver core */
+	faux_dev = &faux_obj->faux_dev;
+	dev = &faux_dev->dev;
+
+	device_initialize(dev);
+	dev->release = faux_device_release;
+	if (parent)
+		dev->parent = parent;
+	else
+		dev->parent = &faux_bus_root;
+	dev->bus = &faux_bus_type;
+	dev_set_name(dev, "%s", name);
+	device_set_pm_not_required(dev);
+
+	ret = device_add(dev);
+	if (ret) {
+		pr_err("%s: device_add for faux device '%s' failed with %d\n",
+		       __func__, name, ret);
+		put_device(dev);
+		return NULL;
+	}
+
+	/*
+	 * Verify that we did bind the driver to the device (i.e. probe worked),
+	 * if not, let's fail the creation as trying to guess if probe was
+	 * successful is almost impossible to determine by the caller.
+	 */
+	if (!dev->driver) {
+		dev_dbg(dev, "probe did not succeed, tearing down the device\n");
+		faux_device_destroy(faux_dev);
+		faux_dev = NULL;
+	}
+
+	return faux_dev;
+}
+EXPORT_SYMBOL_GPL(faux_device_create_with_groups);
+
+/**
+ * faux_device_create - create and register with the driver core a faux device
+ * @name:	The name of the device we are adding, must be unique for all
+ *		faux devices.
+ * @parent:	Pointer to a potential parent struct device.  If set to
+ *		NULL, the device will be created in the "root" of the faux
+ *		device tree in sysfs.
+ * @faux_ops:	struct faux_device_ops that the new device will call back
+ *		into, can be NULL.
+ *
+ * Create a new faux device and register it in the driver core properly.
+ * If present, callbacks in @faux_ops will be called with the device that
+ * for the caller to do something with at the proper time given the
+ * device's lifecycle.
+ *
+ * Note, when this function is called, the functions specified in struct
+ * faux_ops can be called before the function returns, so be prepared for
+ * everything to be properly initialized before that point in time.
+ *
+ * Return:
+ * * NULL if an error happened with creating the device
+ * * pointer to a valid struct faux_device that is registered with sysfs
+ */
+struct faux_device *faux_device_create(const char *name,
+				       struct device *parent,
+				       const struct faux_device_ops *faux_ops)
+{
+	return faux_device_create_with_groups(name, parent, faux_ops, NULL);
+}
+EXPORT_SYMBOL_GPL(faux_device_create);
+
+/**
+ * faux_device_destroy - destroy a faux device
+ * @faux_dev:	faux device to destroy
+ *
+ * Unregisters and cleans up a device that was created with a call to
+ * faux_device_create()
+ */
+void faux_device_destroy(struct faux_device *faux_dev)
+{
+	struct device *dev = &faux_dev->dev;
+
+	if (!faux_dev)
+		return;
+
+	device_del(dev);
+
+	/* The final put_device() will clean up the memory we allocated for this device. */
+	put_device(dev);
+}
+EXPORT_SYMBOL_GPL(faux_device_destroy);
+
+int __init faux_bus_init(void)
+{
+	int ret;
+
+	ret = device_register(&faux_bus_root);
+	if (ret) {
+		put_device(&faux_bus_root);
+		return ret;
+	}
+
+	ret = bus_register(&faux_bus_type);
+	if (ret)
+		goto error_bus;
+
+	ret = driver_register(&faux_driver);
+	if (ret)
+		goto error_driver;
+
+	return ret;
+
+error_driver:
+	bus_unregister(&faux_bus_type);
+
+error_bus:
+	device_unregister(&faux_bus_root);
+	return ret;
+}
diff --git a/drivers/base/firmware_loader/Kconfig b/drivers/base/firmware_loader/Kconfig
index 5b24f3959255..15eff8a4b505 100644
--- a/drivers/base/firmware_loader/Kconfig
+++ b/drivers/base/firmware_loader/Kconfig
@@ -3,6 +3,7 @@ menu "Firmware loader"
 
 config FW_LOADER
 	tristate "Firmware loading facility" if EXPERT
+	select CRYPTO_LIB_SHA256 if FW_LOADER_DEBUG
 	default y
 	help
 	  This enables the firmware loading facility in the kernel. The kernel
@@ -24,11 +25,31 @@ config FW_LOADER
 	  You also want to be sure to enable this built-in if you are going to
 	  enable built-in firmware (CONFIG_EXTRA_FIRMWARE).
 
+config FW_LOADER_DEBUG
+	bool "Log filenames and checksums for loaded firmware"
+	depends on DYNAMIC_DEBUG
+	depends on FW_LOADER
+	default FW_LOADER
+	help
+	  Select this option to use dynamic debug to log firmware filenames and
+	  SHA256 checksums to the kernel log for each firmware file that is
+	  loaded.
+
+config RUST_FW_LOADER_ABSTRACTIONS
+	bool "Rust Firmware Loader abstractions"
+	depends on RUST
+	select FW_LOADER
+	help
+	  This enables the Rust abstractions for the firmware loader API.
+
 if FW_LOADER
 
 config FW_LOADER_PAGED_BUF
 	bool
 
+config FW_LOADER_SYSFS
+	bool
+
 config EXTRA_FIRMWARE
 	string "Build named firmware blobs into the kernel binary"
 	help
@@ -60,6 +81,8 @@ config EXTRA_FIRMWARE
 	  image since it combines both GPL and non-GPL work. You should
 	  consult a lawyer of your own before distributing such an image.
 
+	  NOTE: Compressed files are not supported in EXTRA_FIRMWARE.
+
 config EXTRA_FIRMWARE_DIR
 	string "Firmware blobs root directory"
 	depends on EXTRA_FIRMWARE != ""
@@ -70,6 +93,7 @@ config EXTRA_FIRMWARE_DIR
 
 config FW_LOADER_USER_HELPER
 	bool "Enable the firmware sysfs fallback mechanism"
+	select FW_LOADER_SYSFS
 	select FW_LOADER_PAGED_BUF
 	help
 	  This option enables a sysfs loading facility to enable firmware
@@ -157,17 +181,33 @@ config FW_LOADER_USER_HELPER_FALLBACK
 
 config FW_LOADER_COMPRESS
 	bool "Enable compressed firmware support"
-	select FW_LOADER_PAGED_BUF
-	select XZ_DEC
 	help
 	  This option enables the support for loading compressed firmware
 	  files. The caller of firmware API receives the decompressed file
 	  content. The compressed file is loaded as a fallback, only after
 	  loading the raw file failed at first.
 
-	  Currently only XZ-compressed files are supported, and they have to
-	  be compressed with either none or crc32 integrity check type (pass
-	  "-C crc32" option to xz command).
+	  Compressed firmware support does not apply to firmware images
+	  that are built into the kernel image (CONFIG_EXTRA_FIRMWARE).
+
+if FW_LOADER_COMPRESS
+config FW_LOADER_COMPRESS_XZ
+	bool "Enable XZ-compressed firmware support"
+	select FW_LOADER_PAGED_BUF
+	select XZ_DEC
+	default y
+	help
+	  This option adds the support for XZ-compressed files.
+	  The files have to be compressed with either none or crc32
+	  integrity check type (pass "-C crc32" option to xz command).
+
+config FW_LOADER_COMPRESS_ZSTD
+	bool "Enable ZSTD-compressed firmware support"
+	select ZSTD_DECOMPRESS
+	help
+	  This option adds the support for ZSTD-compressed files.
+
+endif # FW_LOADER_COMPRESS
 
 config FW_CACHE
 	bool "Enable firmware caching during suspend"
@@ -181,5 +221,19 @@ config FW_CACHE
 
 	  If unsure, say Y.
 
+config FW_UPLOAD
+	bool "Enable users to initiate firmware updates using sysfs"
+	select FW_LOADER_SYSFS
+	select FW_LOADER_PAGED_BUF
+	help
+	  Enabling this option will allow device drivers to expose a persistent
+	  sysfs interface that allows firmware updates to be initiated from
+	  userspace. For example, FPGA based PCIe cards load firmware and FPGA
+	  images from local FLASH when the card boots. The images in FLASH may
+	  be updated with new images provided by the user. Enable this device
+	  to support cards that rely on user-initiated updates for firmware files.
+
+	  If unsure, say N.
+
 endif # FW_LOADER
 endmenu
diff --git a/drivers/base/firmware_loader/Makefile b/drivers/base/firmware_loader/Makefile
index e87843408fe6..60d19f9e0ddc 100644
--- a/drivers/base/firmware_loader/Makefile
+++ b/drivers/base/firmware_loader/Makefile
@@ -6,5 +6,7 @@ obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 firmware_class-objs := main.o
 firmware_class-$(CONFIG_FW_LOADER_USER_HELPER) += fallback.o
 firmware_class-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += fallback_platform.o
+firmware_class-$(CONFIG_FW_LOADER_SYSFS) += sysfs.o
+firmware_class-$(CONFIG_FW_UPLOAD) += sysfs_upload.o
 
 obj-y += builtin/
diff --git a/drivers/base/firmware_loader/builtin/Makefile b/drivers/base/firmware_loader/builtin/Makefile
index 101754ad48d9..6c067dedc01e 100644
--- a/drivers/base/firmware_loader/builtin/Makefile
+++ b/drivers/base/firmware_loader/builtin/Makefile
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-y  += main.o
 
 # Create $(fwdir) from $(CONFIG_EXTRA_FIRMWARE_DIR) -- if it doesn't have a
 # leading /, it's relative to $(srctree).
-fwdir := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE_DIR))
+fwdir := $(CONFIG_EXTRA_FIRMWARE_DIR)
 fwdir := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir))
 
-obj-y  := $(addsuffix .gen.o, $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE)))
+firmware  := $(addsuffix .gen.o, $(CONFIG_EXTRA_FIRMWARE))
+obj-y += $(firmware)
 
 FWNAME    = $(patsubst $(obj)/%.gen.S,%,$@)
 FWSTR     = $(subst $(comma),_,$(subst /,_,$(subst .,_,$(subst -,_,$(FWNAME)))))
@@ -34,7 +36,7 @@ $(obj)/%.gen.S: FORCE
 	$(call filechk,fwbin)
 
 # The .o files depend on the binaries directly; the .S files don't.
-$(addprefix $(obj)/, $(obj-y)): $(obj)/%.gen.o: $(fwdir)/%
+$(addprefix $(obj)/, $(firmware)): $(obj)/%.gen.o: $(fwdir)/%
 
 targets := $(patsubst $(obj)/%,%, \
                                 $(shell find $(obj) -name \*.gen.S 2>/dev/null))
diff --git a/drivers/base/firmware_loader/builtin/main.c b/drivers/base/firmware_loader/builtin/main.c
new file mode 100644
index 000000000000..d36befebb1b9
--- /dev/null
+++ b/drivers/base/firmware_loader/builtin/main.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Builtin firmware support */
+
+#include <linux/firmware.h>
+#include "../firmware.h"
+
+/* Only if FW_LOADER=y */
+#ifdef CONFIG_FW_LOADER
+
+struct builtin_fw {
+	char *name;
+	void *data;
+	unsigned long size;
+};
+
+extern struct builtin_fw __start_builtin_fw[];
+extern struct builtin_fw __end_builtin_fw[];
+
+static bool fw_copy_to_prealloc_buf(struct firmware *fw,
+				    void *buf, size_t size)
+{
+	if (!buf)
+		return true;
+	if (size < fw->size)
+		return false;
+	memcpy(buf, fw->data, fw->size);
+	return true;
+}
+
+/**
+ * firmware_request_builtin() - load builtin firmware
+ * @fw: pointer to firmware struct
+ * @name: name of firmware file
+ *
+ * Some use cases in the kernel have a requirement so that no memory allocator
+ * is involved as these calls take place early in boot process. An example is
+ * the x86 CPU microcode loader. In these cases all the caller wants is to see
+ * if the firmware was built-in and if so use it right away. This can be used
+ * for such cases.
+ *
+ * This looks for the firmware in the built-in kernel. Only if the kernel was
+ * built-in with the firmware you are looking for will this return successfully.
+ *
+ * Callers of this API do not need to use release_firmware() as the pointer to
+ * the firmware is expected to be provided locally on the stack of the caller.
+ **/
+bool firmware_request_builtin(struct firmware *fw, const char *name)
+{
+	struct builtin_fw *b_fw;
+
+	if (!fw)
+		return false;
+
+	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
+		if (strcmp(name, b_fw->name) == 0) {
+			fw->size = b_fw->size;
+			fw->data = b_fw->data;
+			return true;
+		}
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_NS_GPL(firmware_request_builtin, "TEST_FIRMWARE");
+
+/**
+ * firmware_request_builtin_buf() - load builtin firmware into optional buffer
+ * @fw: pointer to firmware struct
+ * @name: name of firmware file
+ * @buf: If set this lets you use a pre-allocated buffer so that the built-in
+ *	firmware into is copied into. This field can be NULL. It is used by
+ *	callers such as request_firmware_into_buf() and
+ *	request_partial_firmware_into_buf()
+ * @size: if buf was provided, the max size of the allocated buffer available.
+ *	If the built-in firmware does not fit into the pre-allocated @buf this
+ *	call will fail.
+ *
+ * This looks for the firmware in the built-in kernel. Only if the kernel was
+ * built-in with the firmware you are looking for will this call possibly
+ * succeed. If you passed a @buf the firmware will be copied into it *iff* the
+ * built-in firmware fits into the pre-allocated buffer size specified in
+ * @size.
+ *
+ * This caller is to be used internally by the firmware_loader only.
+ **/
+bool firmware_request_builtin_buf(struct firmware *fw, const char *name,
+				  void *buf, size_t size)
+{
+	if (!firmware_request_builtin(fw, name))
+		return false;
+
+	return fw_copy_to_prealloc_buf(fw, buf, size);
+}
+
+bool firmware_is_builtin(const struct firmware *fw)
+{
+	struct builtin_fw *b_fw;
+
+	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++)
+		if (fw->data == b_fw->data)
+			return true;
+
+	return false;
+}
+
+#endif
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 91899d185e31..3ef0b312ae71 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -3,12 +3,9 @@
 #include <linux/types.h>
 #include <linux/kconfig.h>
 #include <linux/list.h>
-#include <linux/slab.h>
 #include <linux/security.h>
-#include <linux/highmem.h>
 #include <linux/umh.h>
 #include <linux/sysctl.h>
-#include <linux/vmalloc.h>
 #include <linux/module.h>
 
 #include "fallback.h"
@@ -18,22 +15,6 @@
  * firmware fallback mechanism
  */
 
-MODULE_IMPORT_NS(FIRMWARE_LOADER_PRIVATE);
-
-extern struct firmware_fallback_config fw_fallback_config;
-
-/* These getters are vetted to use int properly */
-static inline int __firmware_loading_timeout(void)
-{
-	return fw_fallback_config.loading_timeout;
-}
-
-/* These setters are vetted to use int properly */
-static void __fw_fallback_set_timeout(int timeout)
-{
-	fw_fallback_config.loading_timeout = timeout;
-}
-
 /*
  * use small loading timeout for caching devices' firmware because all these
  * firmware images have been loaded successfully at lease once, also system is
@@ -58,56 +39,14 @@ static long firmware_loading_timeout(void)
 		__firmware_loading_timeout() * HZ : MAX_JIFFY_OFFSET;
 }
 
-static inline bool fw_sysfs_done(struct fw_priv *fw_priv)
-{
-	return __fw_state_check(fw_priv, FW_STATUS_DONE);
-}
-
-static inline bool fw_sysfs_loading(struct fw_priv *fw_priv)
-{
-	return __fw_state_check(fw_priv, FW_STATUS_LOADING);
-}
-
 static inline int fw_sysfs_wait_timeout(struct fw_priv *fw_priv,  long timeout)
 {
 	return __fw_state_wait_common(fw_priv, timeout);
 }
 
-struct fw_sysfs {
-	bool nowait;
-	struct device dev;
-	struct fw_priv *fw_priv;
-	struct firmware *fw;
-};
-
-static struct fw_sysfs *to_fw_sysfs(struct device *dev)
-{
-	return container_of(dev, struct fw_sysfs, dev);
-}
-
-static void __fw_load_abort(struct fw_priv *fw_priv)
-{
-	/*
-	 * There is a small window in which user can write to 'loading'
-	 * between loading done and disappearance of 'loading'
-	 */
-	if (fw_sysfs_done(fw_priv))
-		return;
-
-	list_del_init(&fw_priv->pending_list);
-	fw_state_aborted(fw_priv);
-}
-
-static void fw_load_abort(struct fw_sysfs *fw_sysfs)
-{
-	struct fw_priv *fw_priv = fw_sysfs->fw_priv;
-
-	__fw_load_abort(fw_priv);
-}
-
 static LIST_HEAD(pending_fw_head);
 
-void kill_pending_fw_fallback_reqs(bool only_kill_custom)
+void kill_pending_fw_fallback_reqs(bool kill_all)
 {
 	struct fw_priv *fw_priv;
 	struct fw_priv *next;
@@ -115,376 +54,14 @@ void kill_pending_fw_fallback_reqs(bool only_kill_custom)
 	mutex_lock(&fw_lock);
 	list_for_each_entry_safe(fw_priv, next, &pending_fw_head,
 				 pending_list) {
-		if (!fw_priv->need_uevent || !only_kill_custom)
+		if (kill_all || !fw_priv->need_uevent)
 			 __fw_load_abort(fw_priv);
 	}
-	mutex_unlock(&fw_lock);
-}
-
-static ssize_t timeout_show(struct class *class, struct class_attribute *attr,
-			    char *buf)
-{
-	return sysfs_emit(buf, "%d\n", __firmware_loading_timeout());
-}
-
-/**
- * timeout_store() - set number of seconds to wait for firmware
- * @class: device class pointer
- * @attr: device attribute pointer
- * @buf: buffer to scan for timeout value
- * @count: number of bytes in @buf
- *
- *	Sets the number of seconds to wait for the firmware.  Once
- *	this expires an error will be returned to the driver and no
- *	firmware will be provided.
- *
- *	Note: zero means 'wait forever'.
- **/
-static ssize_t timeout_store(struct class *class, struct class_attribute *attr,
-			     const char *buf, size_t count)
-{
-	int tmp_loading_timeout = simple_strtol(buf, NULL, 10);
-
-	if (tmp_loading_timeout < 0)
-		tmp_loading_timeout = 0;
-
-	__fw_fallback_set_timeout(tmp_loading_timeout);
-
-	return count;
-}
-static CLASS_ATTR_RW(timeout);
-
-static struct attribute *firmware_class_attrs[] = {
-	&class_attr_timeout.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(firmware_class);
-
-static void fw_dev_release(struct device *dev)
-{
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
-
-	kfree(fw_sysfs);
-}
-
-static int do_firmware_uevent(struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env)
-{
-	if (add_uevent_var(env, "FIRMWARE=%s", fw_sysfs->fw_priv->fw_name))
-		return -ENOMEM;
-	if (add_uevent_var(env, "TIMEOUT=%i", __firmware_loading_timeout()))
-		return -ENOMEM;
-	if (add_uevent_var(env, "ASYNC=%d", fw_sysfs->nowait))
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
-	int err = 0;
-
-	mutex_lock(&fw_lock);
-	if (fw_sysfs->fw_priv)
-		err = do_firmware_uevent(fw_sysfs, env);
-	mutex_unlock(&fw_lock);
-	return err;
-}
-
-static struct class firmware_class = {
-	.name		= "firmware",
-	.class_groups	= firmware_class_groups,
-	.dev_uevent	= firmware_uevent,
-	.dev_release	= fw_dev_release,
-};
-
-int register_sysfs_loader(void)
-{
-	return class_register(&firmware_class);
-}
-
-void unregister_sysfs_loader(void)
-{
-	class_unregister(&firmware_class);
-}
-
-static ssize_t firmware_loading_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
-{
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
-	int loading = 0;
-
-	mutex_lock(&fw_lock);
-	if (fw_sysfs->fw_priv)
-		loading = fw_sysfs_loading(fw_sysfs->fw_priv);
-	mutex_unlock(&fw_lock);
-
-	return sysfs_emit(buf, "%d\n", loading);
-}
-
-/**
- * firmware_loading_store() - set value in the 'loading' control file
- * @dev: device pointer
- * @attr: device attribute pointer
- * @buf: buffer to scan for loading control value
- * @count: number of bytes in @buf
- *
- *	The relevant values are:
- *
- *	 1: Start a load, discarding any previous partial load.
- *	 0: Conclude the load and hand the data to the driver code.
- *	-1: Conclude the load with an error and discard any written data.
- **/
-static ssize_t firmware_loading_store(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf, size_t count)
-{
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
-	struct fw_priv *fw_priv;
-	ssize_t written = count;
-	int loading = simple_strtol(buf, NULL, 10);
-
-	mutex_lock(&fw_lock);
-	fw_priv = fw_sysfs->fw_priv;
-	if (fw_state_is_aborted(fw_priv))
-		goto out;
-
-	switch (loading) {
-	case 1:
-		/* discarding any previous partial load */
-		if (!fw_sysfs_done(fw_priv)) {
-			fw_free_paged_buf(fw_priv);
-			fw_state_start(fw_priv);
-		}
-		break;
-	case 0:
-		if (fw_sysfs_loading(fw_priv)) {
-			int rc;
-
-			/*
-			 * Several loading requests may be pending on
-			 * one same firmware buf, so let all requests
-			 * see the mapped 'buf->data' once the loading
-			 * is completed.
-			 * */
-			rc = fw_map_paged_buf(fw_priv);
-			if (rc)
-				dev_err(dev, "%s: map pages failed\n",
-					__func__);
-			else
-				rc = security_kernel_post_load_data(fw_priv->data,
-						fw_priv->size,
-						LOADING_FIRMWARE, "blob");
-
-			/*
-			 * Same logic as fw_load_abort, only the DONE bit
-			 * is ignored and we set ABORT only on failure.
-			 */
-			list_del_init(&fw_priv->pending_list);
-			if (rc) {
-				fw_state_aborted(fw_priv);
-				written = rc;
-			} else {
-				fw_state_done(fw_priv);
-			}
-			break;
-		}
-		fallthrough;
-	default:
-		dev_err(dev, "%s: unexpected value (%d)\n", __func__, loading);
-		fallthrough;
-	case -1:
-		fw_load_abort(fw_sysfs);
-		break;
-	}
-out:
-	mutex_unlock(&fw_lock);
-	return written;
-}
 
-static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store);
-
-static void firmware_rw_data(struct fw_priv *fw_priv, char *buffer,
-			   loff_t offset, size_t count, bool read)
-{
-	if (read)
-		memcpy(buffer, fw_priv->data + offset, count);
-	else
-		memcpy(fw_priv->data + offset, buffer, count);
-}
+	if (kill_all)
+		fw_load_abort_all = true;
 
-static void firmware_rw(struct fw_priv *fw_priv, char *buffer,
-			loff_t offset, size_t count, bool read)
-{
-	while (count) {
-		void *page_data;
-		int page_nr = offset >> PAGE_SHIFT;
-		int page_ofs = offset & (PAGE_SIZE-1);
-		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
-
-		page_data = kmap(fw_priv->pages[page_nr]);
-
-		if (read)
-			memcpy(buffer, page_data + page_ofs, page_cnt);
-		else
-			memcpy(page_data + page_ofs, buffer, page_cnt);
-
-		kunmap(fw_priv->pages[page_nr]);
-		buffer += page_cnt;
-		offset += page_cnt;
-		count -= page_cnt;
-	}
-}
-
-static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buffer, loff_t offset, size_t count)
-{
-	struct device *dev = kobj_to_dev(kobj);
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
-	struct fw_priv *fw_priv;
-	ssize_t ret_count;
-
-	mutex_lock(&fw_lock);
-	fw_priv = fw_sysfs->fw_priv;
-	if (!fw_priv || fw_sysfs_done(fw_priv)) {
-		ret_count = -ENODEV;
-		goto out;
-	}
-	if (offset > fw_priv->size) {
-		ret_count = 0;
-		goto out;
-	}
-	if (count > fw_priv->size - offset)
-		count = fw_priv->size - offset;
-
-	ret_count = count;
-
-	if (fw_priv->data)
-		firmware_rw_data(fw_priv, buffer, offset, count, true);
-	else
-		firmware_rw(fw_priv, buffer, offset, count, true);
-
-out:
 	mutex_unlock(&fw_lock);
-	return ret_count;
-}
-
-static int fw_realloc_pages(struct fw_sysfs *fw_sysfs, int min_size)
-{
-	int err;
-
-	err = fw_grow_paged_buf(fw_sysfs->fw_priv,
-				PAGE_ALIGN(min_size) >> PAGE_SHIFT);
-	if (err)
-		fw_load_abort(fw_sysfs);
-	return err;
-}
-
-/**
- * firmware_data_write() - write method for firmware
- * @filp: open sysfs file
- * @kobj: kobject for the device
- * @bin_attr: bin_attr structure
- * @buffer: buffer being written
- * @offset: buffer offset for write in total data store area
- * @count: buffer size
- *
- *	Data written to the 'data' attribute will be later handed to
- *	the driver as a firmware image.
- **/
-static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj,
-				   struct bin_attribute *bin_attr,
-				   char *buffer, loff_t offset, size_t count)
-{
-	struct device *dev = kobj_to_dev(kobj);
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
-	struct fw_priv *fw_priv;
-	ssize_t retval;
-
-	if (!capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	mutex_lock(&fw_lock);
-	fw_priv = fw_sysfs->fw_priv;
-	if (!fw_priv || fw_sysfs_done(fw_priv)) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	if (fw_priv->data) {
-		if (offset + count > fw_priv->allocated_size) {
-			retval = -ENOMEM;
-			goto out;
-		}
-		firmware_rw_data(fw_priv, buffer, offset, count, false);
-		retval = count;
-	} else {
-		retval = fw_realloc_pages(fw_sysfs, offset + count);
-		if (retval)
-			goto out;
-
-		retval = count;
-		firmware_rw(fw_priv, buffer, offset, count, false);
-	}
-
-	fw_priv->size = max_t(size_t, offset + count, fw_priv->size);
-out:
-	mutex_unlock(&fw_lock);
-	return retval;
-}
-
-static struct bin_attribute firmware_attr_data = {
-	.attr = { .name = "data", .mode = 0644 },
-	.size = 0,
-	.read = firmware_data_read,
-	.write = firmware_data_write,
-};
-
-static struct attribute *fw_dev_attrs[] = {
-	&dev_attr_loading.attr,
-	NULL
-};
-
-static struct bin_attribute *fw_dev_bin_attrs[] = {
-	&firmware_attr_data,
-	NULL
-};
-
-static const struct attribute_group fw_dev_attr_group = {
-	.attrs = fw_dev_attrs,
-	.bin_attrs = fw_dev_bin_attrs,
-};
-
-static const struct attribute_group *fw_dev_attr_groups[] = {
-	&fw_dev_attr_group,
-	NULL
-};
-
-static struct fw_sysfs *
-fw_create_instance(struct firmware *firmware, const char *fw_name,
-		   struct device *device, u32 opt_flags)
-{
-	struct fw_sysfs *fw_sysfs;
-	struct device *f_dev;
-
-	fw_sysfs = kzalloc(sizeof(*fw_sysfs), GFP_KERNEL);
-	if (!fw_sysfs) {
-		fw_sysfs = ERR_PTR(-ENOMEM);
-		goto exit;
-	}
-
-	fw_sysfs->nowait = !!(opt_flags & FW_OPT_NOWAIT);
-	fw_sysfs->fw = firmware;
-	f_dev = &fw_sysfs->dev;
-
-	device_initialize(f_dev);
-	dev_set_name(f_dev, "%s", fw_name);
-	f_dev->parent = device;
-	f_dev->class = &firmware_class;
-	f_dev->groups = fw_dev_attr_groups;
-exit:
-	return fw_sysfs;
 }
 
 /**
@@ -513,6 +90,11 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout)
 	}
 
 	mutex_lock(&fw_lock);
+	if (fw_load_abort_all || fw_state_is_aborted(fw_priv)) {
+		mutex_unlock(&fw_lock);
+		retval = -EINTR;
+		goto out;
+	}
 	list_add(&fw_priv->pending_list, &pending_fw_head);
 	mutex_unlock(&fw_lock);
 
@@ -535,11 +117,10 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout)
 	if (fw_state_is_aborted(fw_priv)) {
 		if (retval == -ERESTARTSYS)
 			retval = -EINTR;
-		else
-			retval = -EAGAIN;
 	} else if (fw_priv->is_paged_buf && !fw_priv->data)
 		retval = -ENOMEM;
 
+out:
 	device_del(f_dev);
 err_put_dev:
 	put_device(f_dev);
diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h
index 3af7205b302f..ccf912bef6ca 100644
--- a/drivers/base/firmware_loader/fallback.h
+++ b/drivers/base/firmware_loader/fallback.h
@@ -6,42 +6,18 @@
 #include <linux/device.h>
 
 #include "firmware.h"
-
-/**
- * struct firmware_fallback_config - firmware fallback configuration settings
- *
- * Helps describe and fine tune the fallback mechanism.
- *
- * @force_sysfs_fallback: force the sysfs fallback mechanism to be used
- * 	as if one had enabled CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y.
- * 	Useful to help debug a CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
- * 	functionality on a kernel where that config entry has been disabled.
- * @ignore_sysfs_fallback: force to disable the sysfs fallback mechanism.
- * 	This emulates the behaviour as if we had set the kernel
- * 	config CONFIG_FW_LOADER_USER_HELPER=n.
- * @old_timeout: for internal use
- * @loading_timeout: the timeout to wait for the fallback mechanism before
- * 	giving up, in seconds.
- */
-struct firmware_fallback_config {
-	unsigned int force_sysfs_fallback;
-	unsigned int ignore_sysfs_fallback;
-	int old_timeout;
-	int loading_timeout;
-};
+#include "sysfs.h"
 
 #ifdef CONFIG_FW_LOADER_USER_HELPER
 int firmware_fallback_sysfs(struct firmware *fw, const char *name,
 			    struct device *device,
 			    u32 opt_flags,
 			    int ret);
-void kill_pending_fw_fallback_reqs(bool only_kill_custom);
+void kill_pending_fw_fallback_reqs(bool kill_all);
 
 void fw_fallback_set_cache_timeout(void);
 void fw_fallback_set_default_timeout(void);
 
-int register_sysfs_loader(void);
-void unregister_sysfs_loader(void);
 #else /* CONFIG_FW_LOADER_USER_HELPER */
 static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name,
 					  struct device *device,
@@ -52,18 +28,9 @@ static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name,
 	return ret;
 }
 
-static inline void kill_pending_fw_fallback_reqs(bool only_kill_custom) { }
+static inline void kill_pending_fw_fallback_reqs(bool kill_all) { }
 static inline void fw_fallback_set_cache_timeout(void) { }
 static inline void fw_fallback_set_default_timeout(void) { }
-
-static inline int register_sysfs_loader(void)
-{
-	return 0;
-}
-
-static inline void unregister_sysfs_loader(void)
-{
-}
 #endif /* CONFIG_FW_LOADER_USER_HELPER */
 
 #ifdef CONFIG_EFI_EMBEDDED_FIRMWARE
diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c
index 46a731dede6f..c8afc501a8a4 100644
--- a/drivers/base/firmware_loader/fallback_table.c
+++ b/drivers/base/firmware_loader/fallback_table.c
@@ -4,6 +4,7 @@
 #include <linux/kconfig.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/security.h>
 #include <linux/highmem.h>
 #include <linux/umh.h>
@@ -21,10 +22,10 @@ struct firmware_fallback_config fw_fallback_config = {
 	.loading_timeout = 60,
 	.old_timeout = 60,
 };
-EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE);
+EXPORT_SYMBOL_NS_GPL(fw_fallback_config, "FIRMWARE_LOADER_PRIVATE");
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table firmware_config_table[] = {
+static const struct ctl_table firmware_config_table[] = {
 	{
 		.procname	= "force_sysfs_fallback",
 		.data		= &fw_fallback_config.force_sysfs_fallback,
@@ -43,6 +44,25 @@ struct ctl_table firmware_config_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
-	{ }
 };
-#endif
+
+static struct ctl_table_header *firmware_config_sysct_table_header;
+int register_firmware_config_sysctl(void)
+{
+	firmware_config_sysct_table_header =
+		register_sysctl("kernel/firmware_config",
+				firmware_config_table);
+	if (!firmware_config_sysct_table_header)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(register_firmware_config_sysctl, "FIRMWARE_LOADER_PRIVATE");
+
+void unregister_firmware_config_sysctl(void)
+{
+	unregister_sysctl_table(firmware_config_sysct_table_header);
+	firmware_config_sysct_table_header = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(unregister_firmware_config_sysctl, "FIRMWARE_LOADER_PRIVATE");
+
+#endif /* CONFIG_SYSCTL */
diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h
index 63bd29fdcb9c..e891742ba264 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -9,8 +9,6 @@
 #include <linux/list.h>
 #include <linux/completion.h>
 
-#include <generated/utsrelease.h>
-
 /**
  * enum fw_opt - options to control firmware loading behaviour
  *
@@ -87,6 +85,8 @@ struct fw_priv {
 };
 
 extern struct mutex fw_lock;
+extern struct firmware_cache fw_cache;
+extern bool fw_load_abort_all;
 
 static inline bool __fw_state_check(struct fw_priv *fw_priv,
 				    enum fw_status status)
@@ -117,8 +117,16 @@ static inline void __fw_state_set(struct fw_priv *fw_priv,
 
 	WRITE_ONCE(fw_st->status, status);
 
-	if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED)
+	if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) {
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+		/*
+		 * Doing this here ensures that the fw_priv is deleted from
+		 * the pending list in all abort/done paths.
+		 */
+		list_del_init(&fw_priv->pending_list);
+#endif
 		complete_all(&fw_st->completion);
+	}
 }
 
 static inline void fw_state_aborted(struct fw_priv *fw_priv)
@@ -141,7 +149,39 @@ static inline void fw_state_done(struct fw_priv *fw_priv)
 	__fw_state_set(fw_priv, FW_STATUS_DONE);
 }
 
+static inline bool fw_state_is_done(struct fw_priv *fw_priv)
+{
+	return __fw_state_check(fw_priv, FW_STATUS_DONE);
+}
+
+static inline bool fw_state_is_loading(struct fw_priv *fw_priv)
+{
+	return __fw_state_check(fw_priv, FW_STATUS_LOADING);
+}
+
+int alloc_lookup_fw_priv(const char *fw_name, struct firmware_cache *fwc,
+			 struct fw_priv **fw_priv, void *dbuf, size_t size,
+			 size_t offset, u32 opt_flags);
 int assign_fw(struct firmware *fw, struct device *device);
+void free_fw_priv(struct fw_priv *fw_priv);
+void fw_state_init(struct fw_priv *fw_priv);
+
+#ifdef CONFIG_FW_LOADER
+bool firmware_is_builtin(const struct firmware *fw);
+bool firmware_request_builtin_buf(struct firmware *fw, const char *name,
+				  void *buf, size_t size);
+#else /* module case */
+static inline bool firmware_is_builtin(const struct firmware *fw)
+{
+	return false;
+}
+static inline bool firmware_request_builtin_buf(struct firmware *fw,
+						const char *name,
+						void *buf, size_t size)
+{
+	return false;
+}
+#endif
 
 #ifdef CONFIG_FW_LOADER_PAGED_BUF
 void fw_free_paged_buf(struct fw_priv *fw_priv);
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 4fdb8219cd08..4ebdca9e4da4 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -35,6 +35,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/reboot.h>
 #include <linux/security.h>
+#include <linux/zstd.h>
 #include <linux/xz.h>
 
 #include <generated/utsrelease.h>
@@ -91,68 +92,10 @@ static inline struct fw_priv *to_fw_priv(struct kref *ref)
  * guarding for corner cases a global lock should be OK */
 DEFINE_MUTEX(fw_lock);
 
-static struct firmware_cache fw_cache;
+struct firmware_cache fw_cache;
+bool fw_load_abort_all;
 
-/* Builtin firmware support */
-
-#ifdef CONFIG_FW_LOADER
-
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
-
-static void fw_copy_to_prealloc_buf(struct firmware *fw,
-				    void *buf, size_t size)
-{
-	if (!buf || size < fw->size)
-		return;
-	memcpy(buf, fw->data, fw->size);
-}
-
-static bool fw_get_builtin_firmware(struct firmware *fw, const char *name,
-				    void *buf, size_t size)
-{
-	struct builtin_fw *b_fw;
-
-	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
-		if (strcmp(name, b_fw->name) == 0) {
-			fw->size = b_fw->size;
-			fw->data = b_fw->data;
-			fw_copy_to_prealloc_buf(fw, buf, size);
-
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static bool fw_is_builtin_firmware(const struct firmware *fw)
-{
-	struct builtin_fw *b_fw;
-
-	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++)
-		if (fw->data == b_fw->data)
-			return true;
-
-	return false;
-}
-
-#else /* Module case - no builtin firmware support */
-
-static inline bool fw_get_builtin_firmware(struct firmware *fw,
-					   const char *name, void *buf,
-					   size_t size)
-{
-	return false;
-}
-
-static inline bool fw_is_builtin_firmware(const struct firmware *fw)
-{
-	return false;
-}
-#endif
-
-static void fw_state_init(struct fw_priv *fw_priv)
+void fw_state_init(struct fw_priv *fw_priv)
 {
 	struct fw_state *fw_st = &fw_priv->fw_st;
 
@@ -165,7 +108,7 @@ static inline int fw_state_wait(struct fw_priv *fw_priv)
 	return __fw_state_wait_common(fw_priv, MAX_SCHEDULE_TIMEOUT);
 }
 
-static int fw_cache_piggyback_on_request(const char *name);
+static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv);
 
 static struct fw_priv *__allocate_fw_priv(const char *fw_name,
 					  struct firmware_cache *fwc,
@@ -222,13 +165,9 @@ static struct fw_priv *__lookup_fw_priv(const char *fw_name)
 }
 
 /* Returns 1 for batching firmware requests with the same name */
-static int alloc_lookup_fw_priv(const char *fw_name,
-				struct firmware_cache *fwc,
-				struct fw_priv **fw_priv,
-				void *dbuf,
-				size_t size,
-				size_t offset,
-				u32 opt_flags)
+int alloc_lookup_fw_priv(const char *fw_name, struct firmware_cache *fwc,
+			 struct fw_priv **fw_priv, void *dbuf, size_t size,
+			 size_t offset, u32 opt_flags)
 {
 	struct fw_priv *tmp;
 
@@ -283,7 +222,7 @@ static void __free_fw_priv(struct kref *ref)
 	kfree(fw_priv);
 }
 
-static void free_fw_priv(struct fw_priv *fw_priv)
+void free_fw_priv(struct fw_priv *fw_priv)
 {
 	struct firmware_cache *fwc = fw_priv->fwc;
 	spin_lock(&fwc->lock);
@@ -312,6 +251,8 @@ void fw_free_paged_buf(struct fw_priv *fw_priv)
 	fw_priv->pages = NULL;
 	fw_priv->page_array_size = 0;
 	fw_priv->nr_pages = 0;
+	fw_priv->data = NULL;
+	fw_priv->size = 0;
 }
 
 int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed)
@@ -364,9 +305,73 @@ int fw_map_paged_buf(struct fw_priv *fw_priv)
 #endif
 
 /*
+ * ZSTD-compressed firmware support
+ */
+#ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD
+static int fw_decompress_zstd(struct device *dev, struct fw_priv *fw_priv,
+			      size_t in_size, const void *in_buffer)
+{
+	size_t len, out_size, workspace_size;
+	void *workspace, *out_buf;
+	zstd_dctx *ctx;
+	int err;
+
+	if (fw_priv->allocated_size) {
+		out_size = fw_priv->allocated_size;
+		out_buf = fw_priv->data;
+	} else {
+		zstd_frame_header params;
+
+		if (zstd_get_frame_header(&params, in_buffer, in_size) ||
+		    params.frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
+			dev_dbg(dev, "%s: invalid zstd header\n", __func__);
+			return -EINVAL;
+		}
+		out_size = params.frameContentSize;
+		out_buf = vzalloc(out_size);
+		if (!out_buf)
+			return -ENOMEM;
+	}
+
+	workspace_size = zstd_dctx_workspace_bound();
+	workspace = kvzalloc(workspace_size, GFP_KERNEL);
+	if (!workspace) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	ctx = zstd_init_dctx(workspace, workspace_size);
+	if (!ctx) {
+		dev_dbg(dev, "%s: failed to initialize context\n", __func__);
+		err = -EINVAL;
+		goto error;
+	}
+
+	len = zstd_decompress_dctx(ctx, out_buf, out_size, in_buffer, in_size);
+	if (zstd_is_error(len)) {
+		dev_dbg(dev, "%s: failed to decompress: %d\n", __func__,
+			zstd_get_error_code(len));
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (!fw_priv->allocated_size)
+		fw_priv->data = out_buf;
+	fw_priv->size = len;
+	err = 0;
+
+ error:
+	kvfree(workspace);
+	if (err && !fw_priv->allocated_size)
+		vfree(out_buf);
+	return err;
+}
+#endif /* CONFIG_FW_LOADER_COMPRESS_ZSTD */
+
+/*
  * XZ-compressed firmware support
  */
-#ifdef CONFIG_FW_LOADER_COMPRESS
+#ifdef CONFIG_FW_LOADER_COMPRESS_XZ
 /* show an error and return the standard error code */
 static int fw_decompress_xz_error(struct device *dev, enum xz_ret xz_ret)
 {
@@ -431,11 +436,11 @@ static int fw_decompress_xz_pages(struct device *dev, struct fw_priv *fw_priv,
 
 		/* decompress onto the new allocated page */
 		page = fw_priv->pages[fw_priv->nr_pages - 1];
-		xz_buf.out = kmap(page);
+		xz_buf.out = kmap_local_page(page);
 		xz_buf.out_pos = 0;
 		xz_buf.out_size = PAGE_SIZE;
 		xz_ret = xz_dec_run(xz_dec, &xz_buf);
-		kunmap(page);
+		kunmap_local(xz_buf.out);
 		fw_priv->size += xz_buf.out_pos;
 		/* partial decompression means either end or error */
 		if (xz_buf.out_pos != PAGE_SIZE)
@@ -460,7 +465,7 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv,
 	else
 		return fw_decompress_xz_pages(dev, fw_priv, in_size, in_buffer);
 }
-#endif /* CONFIG_FW_LOADER_COMPRESS */
+#endif /* CONFIG_FW_LOADER_COMPRESS_XZ */
 
 /* direct firmware loading support */
 static char fw_path_para[256];
@@ -489,9 +494,9 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
 					     const void *in_buffer))
 {
 	size_t size;
-	int i, len;
+	int i, len, maxlen = 0;
 	int rc = -ENOENT;
-	char *path;
+	char *path, *nt = NULL;
 	size_t msize = INT_MAX;
 	void *buffer = NULL;
 
@@ -514,8 +519,17 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
 		if (!fw_path[i][0])
 			continue;
 
-		len = snprintf(path, PATH_MAX, "%s/%s%s",
-			       fw_path[i], fw_priv->fw_name, suffix);
+		/* strip off \n from customized path */
+		maxlen = strlen(fw_path[i]);
+		if (i == 0) {
+			nt = strchr(fw_path[i], '\n');
+			if (nt)
+				maxlen = nt - fw_path[i];
+		}
+
+		len = snprintf(path, PATH_MAX, "%.*s/%s%s",
+			       maxlen, fw_path[i],
+			       fw_priv->fw_name, suffix);
 		if (len >= PATH_MAX) {
 			rc = -ENAMETOOLONG;
 			break;
@@ -537,12 +551,16 @@ fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
 						       file_size_ptr,
 						       READING_FIRMWARE);
 		if (rc < 0) {
-			if (rc != -ENOENT)
-				dev_warn(device, "loading %s failed with error %d\n",
-					 path, rc);
-			else
-				dev_dbg(device, "loading %s failed for no such file or directory.\n",
-					 path);
+			if (!(fw_priv->opt_flags & FW_OPT_NO_WARN)) {
+				if (rc != -ENOENT)
+					dev_warn(device,
+						 "loading %s failed with error %d\n",
+						 path, rc);
+				else
+					dev_dbg(device,
+						"loading %s failed for no such file or directory.\n",
+						path);
+			}
 			continue;
 		}
 		size = rc;
@@ -707,10 +725,8 @@ int assign_fw(struct firmware *fw, struct device *device)
 	 * on request firmware.
 	 */
 	if (!(fw_priv->opt_flags & FW_OPT_NOCACHE) &&
-	    fw_priv->fwc->state == FW_LOADER_START_CACHE) {
-		if (fw_cache_piggyback_on_request(fw_priv->fw_name))
-			kref_get(&fw_priv->ref);
-	}
+	    fw_priv->fwc->state == FW_LOADER_START_CACHE)
+		fw_cache_piggyback_on_request(fw_priv);
 
 	/* pass the pages buffer to driver at the last minute */
 	fw_set_page_data(fw_priv, fw);
@@ -738,7 +754,7 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name,
 		return -ENOMEM;
 	}
 
-	if (fw_get_builtin_firmware(firmware, name, dbuf, size)) {
+	if (firmware_request_builtin_buf(firmware, name, dbuf, size)) {
 		dev_dbg(device, "using built-in %s\n", name);
 		return 0; /* assigned */
 	}
@@ -783,9 +799,28 @@ static void fw_abort_batch_reqs(struct firmware *fw)
 		return;
 
 	fw_priv = fw->priv;
+	mutex_lock(&fw_lock);
 	if (!fw_state_is_aborted(fw_priv))
 		fw_state_aborted(fw_priv);
+	mutex_unlock(&fw_lock);
+}
+
+#if defined(CONFIG_FW_LOADER_DEBUG)
+#include <crypto/sha2.h>
+
+static void fw_log_firmware_info(const struct firmware *fw, const char *name, struct device *device)
+{
+	u8 digest[SHA256_DIGEST_SIZE];
+
+	sha256(fw->data, fw->size, digest);
+	dev_dbg(device, "Loaded FW: %s, sha256: %*phN\n",
+		name, SHA256_DIGEST_SIZE, digest);
 }
+#else
+static void fw_log_firmware_info(const struct firmware *fw, const char *name,
+				 struct device *device)
+{}
+#endif
 
 /* called from request_firmware() and request_firmware_work_func() */
 static int
@@ -805,41 +840,75 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 		goto out;
 	}
 
+
+	/*
+	 * Reject firmware file names with ".." path components.
+	 * There are drivers that construct firmware file names from
+	 * device-supplied strings, and we don't want some device to be
+	 * able to tell us "I would like to be sent my firmware from
+	 * ../../../etc/shadow, please".
+	 *
+	 * This intentionally only looks at the firmware name, not at
+	 * the firmware base directory or at symlink contents.
+	 */
+	if (name_contains_dotdot(name)) {
+		dev_warn(device,
+			 "Firmware load for '%s' refused, path contains '..' component\n",
+			 name);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = _request_firmware_prepare(&fw, name, device, buf, size,
 					offset, opt_flags);
 	if (ret <= 0) /* error or already assigned */
 		goto out;
 
-	ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);
+	/*
+	 * We are about to try to access the firmware file. Because we may have been
+	 * called by a driver when serving an unrelated request from userland, we use
+	 * the kernel credentials to read the file.
+	 */
+	scoped_with_kernel_creds() {
+		ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);
 
-	/* Only full reads can support decompression, platform, and sysfs. */
-	if (!(opt_flags & FW_OPT_PARTIAL))
-		nondirect = true;
+		/* Only full reads can support decompression, platform, and sysfs. */
+		if (!(opt_flags & FW_OPT_PARTIAL))
+			nondirect = true;
 
-#ifdef CONFIG_FW_LOADER_COMPRESS
-	if (ret == -ENOENT && nondirect)
-		ret = fw_get_filesystem_firmware(device, fw->priv, ".xz",
-						 fw_decompress_xz);
+#ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD
+		if (ret == -ENOENT && nondirect)
+			ret = fw_get_filesystem_firmware(device, fw->priv, ".zst",
+							 fw_decompress_zstd);
+#endif
+#ifdef CONFIG_FW_LOADER_COMPRESS_XZ
+		if (ret == -ENOENT && nondirect)
+			ret = fw_get_filesystem_firmware(device, fw->priv, ".xz",
+							 fw_decompress_xz);
 #endif
-	if (ret == -ENOENT && nondirect)
-		ret = firmware_fallback_platform(fw->priv);
+		if (ret == -ENOENT && nondirect)
+			ret = firmware_fallback_platform(fw->priv);
 
-	if (ret) {
-		if (!(opt_flags & FW_OPT_NO_WARN))
-			dev_warn(device,
-				 "Direct firmware load for %s failed with error %d\n",
-				 name, ret);
-		if (nondirect)
-			ret = firmware_fallback_sysfs(fw, name, device,
-						      opt_flags, ret);
-	} else
-		ret = assign_fw(fw, device);
+		if (ret) {
+			if (!(opt_flags & FW_OPT_NO_WARN))
+				dev_warn(device,
+					 "Direct firmware load for %s failed with error %d\n",
+					 name, ret);
+			if (nondirect)
+				ret = firmware_fallback_sysfs(fw, name, device,
+							      opt_flags, ret);
+		} else {
+			ret = assign_fw(fw, device);
+		}
+	}
 
- out:
+out:
 	if (ret < 0) {
 		fw_abort_batch_reqs(fw);
 		release_firmware(fw);
 		fw = NULL;
+	} else {
+		fw_log_firmware_info(fw, name, device);
 	}
 
 	*firmware_p = fw;
@@ -860,6 +929,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
  *      @name will be used as $FIRMWARE in the uevent environment and
  *      should be distinctive enough not to be confused with any other
  *      firmware image for this or any other device.
+ *	It must not contain any ".." path components - "foo/bar..bin" is
+ *	allowed, but "foo/../bar.bin" is not.
  *
  *	Caller must hold the reference count of @device.
  *
@@ -959,8 +1030,8 @@ EXPORT_SYMBOL_GPL(firmware_request_platform);
 
 /**
  * firmware_request_cache() - cache firmware for suspend so resume can use it
- * @name: name of firmware file
  * @device: device for which firmware should be cached for
+ * @name: name of firmware file
  *
  * There are some devices with an optimization that enables the device to not
  * require loading firmware on system reboot. This optimization may still
@@ -1051,7 +1122,7 @@ EXPORT_SYMBOL(request_partial_firmware_into_buf);
 void release_firmware(const struct firmware *fw)
 {
 	if (fw) {
-		if (!fw_is_builtin_firmware(fw))
+		if (!firmware_is_builtin(fw))
 			firmware_free_data(fw);
 		kfree(fw);
 	}
@@ -1086,34 +1157,11 @@ static void request_firmware_work_func(struct work_struct *work)
 	kfree(fw_work);
 }
 
-/**
- * request_firmware_nowait() - asynchronous version of request_firmware
- * @module: module requesting the firmware
- * @uevent: sends uevent to copy the firmware image if this flag
- *	is non-zero else the firmware copy must be done manually.
- * @name: name of firmware file
- * @device: device for which firmware is being loaded
- * @gfp: allocation flags
- * @context: will be passed over to @cont, and
- *	@fw may be %NULL if firmware request fails.
- * @cont: function will be called asynchronously when the firmware
- *	request is over.
- *
- *	Caller must hold the reference count of @device.
- *
- *	Asynchronous variant of request_firmware() for user contexts:
- *		- sleep for as small periods as possible since it may
- *		  increase kernel boot time of built-in device drivers
- *		  requesting firmware in their ->probe() methods, if
- *		  @gfp is GFP_KERNEL.
- *
- *		- can't sleep at all if @gfp is GFP_ATOMIC.
- **/
-int
-request_firmware_nowait(
+
+static int _request_firmware_nowait(
 	struct module *module, bool uevent,
 	const char *name, struct device *device, gfp_t gfp, void *context,
-	void (*cont)(const struct firmware *fw, void *context))
+	void (*cont)(const struct firmware *fw, void *context), bool nowarn)
 {
 	struct firmware_work *fw_work;
 
@@ -1131,7 +1179,8 @@ request_firmware_nowait(
 	fw_work->context = context;
 	fw_work->cont = cont;
 	fw_work->opt_flags = FW_OPT_NOWAIT |
-		(uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER);
+		(uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER) |
+		(nowarn ? FW_OPT_NO_WARN : 0);
 
 	if (!uevent && fw_cache_is_setup(device, name)) {
 		kfree_const(fw_work->name);
@@ -1150,8 +1199,66 @@ request_firmware_nowait(
 	schedule_work(&fw_work->work);
 	return 0;
 }
+
+/**
+ * request_firmware_nowait() - asynchronous version of request_firmware
+ * @module: module requesting the firmware
+ * @uevent: sends uevent to copy the firmware image if this flag
+ *	is non-zero else the firmware copy must be done manually.
+ * @name: name of firmware file
+ * @device: device for which firmware is being loaded
+ * @gfp: allocation flags
+ * @context: will be passed over to @cont, and
+ *	@fw may be %NULL if firmware request fails.
+ * @cont: function will be called asynchronously when the firmware
+ *	request is over.
+ *
+ *	Caller must hold the reference count of @device.
+ *
+ *	Asynchronous variant of request_firmware() for user contexts:
+ *		- sleep for as small periods as possible since it may
+ *		  increase kernel boot time of built-in device drivers
+ *		  requesting firmware in their ->probe() methods, if
+ *		  @gfp is GFP_KERNEL.
+ *
+ *		- can't sleep at all if @gfp is GFP_ATOMIC.
+ **/
+int request_firmware_nowait(
+	struct module *module, bool uevent,
+	const char *name, struct device *device, gfp_t gfp, void *context,
+	void (*cont)(const struct firmware *fw, void *context))
+{
+	return _request_firmware_nowait(module, uevent, name, device, gfp,
+					context, cont, false);
+
+}
 EXPORT_SYMBOL(request_firmware_nowait);
 
+/**
+ * firmware_request_nowait_nowarn() - async version of request_firmware_nowarn
+ * @module: module requesting the firmware
+ * @name: name of firmware file
+ * @device: device for which firmware is being loaded
+ * @gfp: allocation flags
+ * @context: will be passed over to @cont, and
+ *	@fw may be %NULL if firmware request fails.
+ * @cont: function will be called asynchronously when the firmware
+ *	request is over.
+ *
+ * Similar in function to request_firmware_nowait(), but doesn't print a warning
+ * when the firmware file could not be found and always sends a uevent to copy
+ * the firmware image.
+ */
+int firmware_request_nowait_nowarn(
+	struct module *module, const char *name,
+	struct device *device, gfp_t gfp, void *context,
+	void (*cont)(const struct firmware *fw, void *context))
+{
+	return _request_firmware_nowait(module, FW_ACTION_UEVENT, name, device,
+					gfp, context, cont, true);
+}
+EXPORT_SYMBOL_GPL(firmware_request_nowait_nowarn);
+
 #ifdef CONFIG_FW_CACHE
 static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain);
 
@@ -1215,7 +1322,7 @@ static int uncache_firmware(const char *fw_name)
 
 	pr_debug("%s: %s\n", __func__, fw_name);
 
-	if (fw_get_builtin_firmware(&fw, fw_name, NULL, 0))
+	if (firmware_request_builtin(&fw, fw_name))
 		return 0;
 
 	fw_priv = lookup_fw_priv(fw_name);
@@ -1257,11 +1364,11 @@ static int __fw_entry_found(const char *name)
 	return 0;
 }
 
-static int fw_cache_piggyback_on_request(const char *name)
+static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv)
 {
-	struct firmware_cache *fwc = &fw_cache;
+	const char *name = fw_priv->fw_name;
+	struct firmware_cache *fwc = fw_priv->fwc;
 	struct fw_cache_entry *fce;
-	int ret = 0;
 
 	spin_lock(&fwc->name_lock);
 	if (__fw_entry_found(name))
@@ -1269,13 +1376,12 @@ static int fw_cache_piggyback_on_request(const char *name)
 
 	fce = alloc_fw_cache_entry(name);
 	if (fce) {
-		ret = 1;
 		list_add(&fce->list, &fwc->fw_names);
+		kref_get(&fw_priv->ref);
 		pr_debug("%s: fw: %s\n", __func__, name);
 	}
 found:
 	spin_unlock(&fwc->name_lock);
-	return ret;
 }
 
 static void free_fw_cache_entry(struct fw_cache_entry *fce)
@@ -1444,10 +1550,10 @@ static int fw_pm_notify(struct notifier_block *notify_block,
 	case PM_SUSPEND_PREPARE:
 	case PM_RESTORE_PREPARE:
 		/*
-		 * kill pending fallback requests with a custom fallback
-		 * to avoid stalling suspend.
+		 * Here, kill pending fallback requests will only kill
+		 * non-uevent firmware request to avoid stalling suspend.
 		 */
-		kill_pending_fw_fallback_reqs(true);
+		kill_pending_fw_fallback_reqs(false);
 		device_cache_fw_images();
 		break;
 
@@ -1470,16 +1576,20 @@ static int fw_pm_notify(struct notifier_block *notify_block,
 }
 
 /* stop caching firmware once syscore_suspend is reached */
-static int fw_suspend(void)
+static int fw_suspend(void *data)
 {
 	fw_cache.state = FW_LOADER_NO_CACHE;
 	return 0;
 }
 
-static struct syscore_ops fw_syscore_ops = {
+static const struct syscore_ops fw_syscore_ops = {
 	.suspend = fw_suspend,
 };
 
+static struct syscore fw_syscore = {
+	.ops = &fw_syscore_ops,
+};
+
 static int __init register_fw_pm_ops(void)
 {
 	int ret;
@@ -1495,20 +1605,19 @@ static int __init register_fw_pm_ops(void)
 	if (ret)
 		return ret;
 
-	register_syscore_ops(&fw_syscore_ops);
+	register_syscore(&fw_syscore);
 
 	return ret;
 }
 
 static inline void unregister_fw_pm_ops(void)
 {
-	unregister_syscore_ops(&fw_syscore_ops);
+	unregister_syscore(&fw_syscore);
 	unregister_pm_notifier(&fw_cache.pm_notify);
 }
 #else
-static int fw_cache_piggyback_on_request(const char *name)
+static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv)
 {
-	return 0;
 }
 static inline int register_fw_pm_ops(void)
 {
@@ -1533,7 +1642,7 @@ static int fw_shutdown_notify(struct notifier_block *unused1,
 	 * Kill all pending fallback requests to avoid both stalling shutdown,
 	 * and avoid a deadlock with the usermode_lock.
 	 */
-	kill_pending_fw_fallback_reqs(false);
+	kill_pending_fw_fallback_reqs(true);
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c
new file mode 100644
index 000000000000..92e91050f96a
--- /dev/null
+++ b/drivers/base/firmware_loader/sysfs.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "sysfs.h"
+
+/*
+ * sysfs support for firmware loader
+ */
+
+void __fw_load_abort(struct fw_priv *fw_priv)
+{
+	/*
+	 * There is a small window in which user can write to 'loading'
+	 * between loading done/aborted and disappearance of 'loading'
+	 */
+	if (fw_state_is_aborted(fw_priv) || fw_state_is_done(fw_priv))
+		return;
+
+	fw_state_aborted(fw_priv);
+}
+
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+static ssize_t timeout_show(const struct class *class, const struct class_attribute *attr,
+			    char *buf)
+{
+	return sysfs_emit(buf, "%d\n", __firmware_loading_timeout());
+}
+
+/**
+ * timeout_store() - set number of seconds to wait for firmware
+ * @class: device class pointer
+ * @attr: device attribute pointer
+ * @buf: buffer to scan for timeout value
+ * @count: number of bytes in @buf
+ *
+ *	Sets the number of seconds to wait for the firmware.  Once
+ *	this expires an error will be returned to the driver and no
+ *	firmware will be provided.
+ *
+ *	Note: zero means 'wait forever'.
+ **/
+static ssize_t timeout_store(const struct class *class, const struct class_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int tmp_loading_timeout;
+
+	if (kstrtoint(buf, 10, &tmp_loading_timeout))
+		return -EINVAL;
+
+	if (tmp_loading_timeout < 0)
+		tmp_loading_timeout = 0;
+
+	__fw_fallback_set_timeout(tmp_loading_timeout);
+
+	return count;
+}
+static CLASS_ATTR_RW(timeout);
+
+static struct attribute *firmware_class_attrs[] = {
+	&class_attr_timeout.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(firmware_class);
+
+static int do_firmware_uevent(const struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env)
+{
+	if (add_uevent_var(env, "FIRMWARE=%s", fw_sysfs->fw_priv->fw_name))
+		return -ENOMEM;
+	if (add_uevent_var(env, "TIMEOUT=%i", __firmware_loading_timeout()))
+		return -ENOMEM;
+	if (add_uevent_var(env, "ASYNC=%d", fw_sysfs->nowait))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int firmware_uevent(const struct device *dev, struct kobj_uevent_env *env)
+{
+	const struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+	int err = 0;
+
+	mutex_lock(&fw_lock);
+	if (fw_sysfs->fw_priv)
+		err = do_firmware_uevent(fw_sysfs, env);
+	mutex_unlock(&fw_lock);
+	return err;
+}
+#endif /* CONFIG_FW_LOADER_USER_HELPER */
+
+static void fw_dev_release(struct device *dev)
+{
+	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+
+	if (fw_sysfs->fw_upload_priv)
+		fw_upload_free(fw_sysfs);
+
+	kfree(fw_sysfs);
+}
+
+static struct class firmware_class = {
+	.name		= "firmware",
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	.class_groups	= firmware_class_groups,
+	.dev_uevent	= firmware_uevent,
+#endif
+	.dev_release	= fw_dev_release,
+};
+
+int register_sysfs_loader(void)
+{
+	int ret = class_register(&firmware_class);
+
+	if (ret != 0)
+		return ret;
+	return register_firmware_config_sysctl();
+}
+
+void unregister_sysfs_loader(void)
+{
+	unregister_firmware_config_sysctl();
+	class_unregister(&firmware_class);
+}
+
+static ssize_t firmware_loading_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+	int loading = 0;
+
+	mutex_lock(&fw_lock);
+	if (fw_sysfs->fw_priv)
+		loading = fw_state_is_loading(fw_sysfs->fw_priv);
+	mutex_unlock(&fw_lock);
+
+	return sysfs_emit(buf, "%d\n", loading);
+}
+
+/**
+ * firmware_loading_store() - set value in the 'loading' control file
+ * @dev: device pointer
+ * @attr: device attribute pointer
+ * @buf: buffer to scan for loading control value
+ * @count: number of bytes in @buf
+ *
+ *	The relevant values are:
+ *
+ *	 1: Start a load, discarding any previous partial load.
+ *	 0: Conclude the load and hand the data to the driver code.
+ *	-1: Conclude the load with an error and discard any written data.
+ **/
+static ssize_t firmware_loading_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+	struct fw_priv *fw_priv;
+	ssize_t written = count;
+	int loading;
+
+	if (kstrtoint(buf, 10, &loading))
+		return -EINVAL;
+
+	mutex_lock(&fw_lock);
+	fw_priv = fw_sysfs->fw_priv;
+	if (fw_state_is_aborted(fw_priv) || fw_state_is_done(fw_priv))
+		goto out;
+
+	switch (loading) {
+	case 1:
+		/* discarding any previous partial load */
+		fw_free_paged_buf(fw_priv);
+		fw_state_start(fw_priv);
+		break;
+	case 0:
+		if (fw_state_is_loading(fw_priv)) {
+			int rc;
+
+			/*
+			 * Several loading requests may be pending on
+			 * one same firmware buf, so let all requests
+			 * see the mapped 'buf->data' once the loading
+			 * is completed.
+			 */
+			rc = fw_map_paged_buf(fw_priv);
+			if (rc)
+				dev_err(dev, "%s: map pages failed\n",
+					__func__);
+			else
+				rc = security_kernel_post_load_data(fw_priv->data,
+								    fw_priv->size,
+								    LOADING_FIRMWARE,
+								    "blob");
+
+			/*
+			 * Same logic as fw_load_abort, only the DONE bit
+			 * is ignored and we set ABORT only on failure.
+			 */
+			if (rc) {
+				fw_state_aborted(fw_priv);
+				written = rc;
+			} else {
+				fw_state_done(fw_priv);
+
+				/*
+				 * If this is a user-initiated firmware upload
+				 * then start the upload in a worker thread now.
+				 */
+				rc = fw_upload_start(fw_sysfs);
+				if (rc)
+					written = rc;
+			}
+			break;
+		}
+		fallthrough;
+	default:
+		dev_err(dev, "%s: unexpected value (%d)\n", __func__, loading);
+		fallthrough;
+	case -1:
+		fw_load_abort(fw_sysfs);
+		if (fw_sysfs->fw_upload_priv)
+			fw_state_init(fw_sysfs->fw_priv);
+
+		break;
+	}
+out:
+	mutex_unlock(&fw_lock);
+	return written;
+}
+
+DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store);
+
+static void firmware_rw_data(struct fw_priv *fw_priv, char *buffer,
+			     loff_t offset, size_t count, bool read)
+{
+	if (read)
+		memcpy(buffer, fw_priv->data + offset, count);
+	else
+		memcpy(fw_priv->data + offset, buffer, count);
+}
+
+static void firmware_rw(struct fw_priv *fw_priv, char *buffer,
+			loff_t offset, size_t count, bool read)
+{
+	while (count) {
+		int page_nr = offset >> PAGE_SHIFT;
+		int page_ofs = offset & (PAGE_SIZE - 1);
+		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
+
+		if (read)
+			memcpy_from_page(buffer, fw_priv->pages[page_nr],
+					 page_ofs, page_cnt);
+		else
+			memcpy_to_page(fw_priv->pages[page_nr], page_ofs,
+				       buffer, page_cnt);
+
+		buffer += page_cnt;
+		offset += page_cnt;
+		count -= page_cnt;
+	}
+}
+
+static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj,
+				  const struct bin_attribute *bin_attr,
+				  char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+	struct fw_priv *fw_priv;
+	ssize_t ret_count;
+
+	mutex_lock(&fw_lock);
+	fw_priv = fw_sysfs->fw_priv;
+	if (!fw_priv || fw_state_is_done(fw_priv)) {
+		ret_count = -ENODEV;
+		goto out;
+	}
+	if (offset > fw_priv->size) {
+		ret_count = 0;
+		goto out;
+	}
+	if (count > fw_priv->size - offset)
+		count = fw_priv->size - offset;
+
+	ret_count = count;
+
+	if (fw_priv->data)
+		firmware_rw_data(fw_priv, buffer, offset, count, true);
+	else
+		firmware_rw(fw_priv, buffer, offset, count, true);
+
+out:
+	mutex_unlock(&fw_lock);
+	return ret_count;
+}
+
+static int fw_realloc_pages(struct fw_sysfs *fw_sysfs, int min_size)
+{
+	int err;
+
+	err = fw_grow_paged_buf(fw_sysfs->fw_priv,
+				PAGE_ALIGN(min_size) >> PAGE_SHIFT);
+	if (err)
+		fw_load_abort(fw_sysfs);
+	return err;
+}
+
+/**
+ * firmware_data_write() - write method for firmware
+ * @filp: open sysfs file
+ * @kobj: kobject for the device
+ * @bin_attr: bin_attr structure
+ * @buffer: buffer being written
+ * @offset: buffer offset for write in total data store area
+ * @count: buffer size
+ *
+ *	Data written to the 'data' attribute will be later handed to
+ *	the driver as a firmware image.
+ **/
+static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj,
+				   const struct bin_attribute *bin_attr,
+				   char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+	struct fw_priv *fw_priv;
+	ssize_t retval;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	mutex_lock(&fw_lock);
+	fw_priv = fw_sysfs->fw_priv;
+	if (!fw_priv || fw_state_is_done(fw_priv)) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	if (fw_priv->data) {
+		if (offset + count > fw_priv->allocated_size) {
+			retval = -ENOMEM;
+			goto out;
+		}
+		firmware_rw_data(fw_priv, buffer, offset, count, false);
+		retval = count;
+	} else {
+		retval = fw_realloc_pages(fw_sysfs, offset + count);
+		if (retval)
+			goto out;
+
+		retval = count;
+		firmware_rw(fw_priv, buffer, offset, count, false);
+	}
+
+	fw_priv->size = max_t(size_t, offset + count, fw_priv->size);
+out:
+	mutex_unlock(&fw_lock);
+	return retval;
+}
+
+static const struct bin_attribute firmware_attr_data = {
+	.attr = { .name = "data", .mode = 0644 },
+	.size = 0,
+	.read = firmware_data_read,
+	.write = firmware_data_write,
+};
+
+static struct attribute *fw_dev_attrs[] = {
+	&dev_attr_loading.attr,
+#ifdef CONFIG_FW_UPLOAD
+	&dev_attr_cancel.attr,
+	&dev_attr_status.attr,
+	&dev_attr_error.attr,
+	&dev_attr_remaining_size.attr,
+#endif
+	NULL
+};
+
+static const struct bin_attribute *const fw_dev_bin_attrs[] = {
+	&firmware_attr_data,
+	NULL
+};
+
+static const struct attribute_group fw_dev_attr_group = {
+	.attrs = fw_dev_attrs,
+	.bin_attrs = fw_dev_bin_attrs,
+#ifdef CONFIG_FW_UPLOAD
+	.is_visible = fw_upload_is_visible,
+#endif
+};
+
+static const struct attribute_group *fw_dev_attr_groups[] = {
+	&fw_dev_attr_group,
+	NULL
+};
+
+struct fw_sysfs *
+fw_create_instance(struct firmware *firmware, const char *fw_name,
+		   struct device *device, u32 opt_flags)
+{
+	struct fw_sysfs *fw_sysfs;
+	struct device *f_dev;
+
+	fw_sysfs = kzalloc(sizeof(*fw_sysfs), GFP_KERNEL);
+	if (!fw_sysfs) {
+		fw_sysfs = ERR_PTR(-ENOMEM);
+		goto exit;
+	}
+
+	fw_sysfs->nowait = !!(opt_flags & FW_OPT_NOWAIT);
+	fw_sysfs->fw = firmware;
+	f_dev = &fw_sysfs->dev;
+
+	device_initialize(f_dev);
+	dev_set_name(f_dev, "%s", fw_name);
+	f_dev->parent = device;
+	f_dev->class = &firmware_class;
+	f_dev->groups = fw_dev_attr_groups;
+exit:
+	return fw_sysfs;
+}
diff --git a/drivers/base/firmware_loader/sysfs.h b/drivers/base/firmware_loader/sysfs.h
new file mode 100644
index 000000000000..1cabea544a40
--- /dev/null
+++ b/drivers/base/firmware_loader/sysfs.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __FIRMWARE_SYSFS_H
+#define __FIRMWARE_SYSFS_H
+
+#include <linux/device.h>
+
+#include "firmware.h"
+
+MODULE_IMPORT_NS("FIRMWARE_LOADER_PRIVATE");
+
+extern struct firmware_fallback_config fw_fallback_config;
+extern struct device_attribute dev_attr_loading;
+
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+/**
+ * struct firmware_fallback_config - firmware fallback configuration settings
+ *
+ * Helps describe and fine tune the fallback mechanism.
+ *
+ * @force_sysfs_fallback: force the sysfs fallback mechanism to be used
+ *	as if one had enabled CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y.
+ *	Useful to help debug a CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
+ *	functionality on a kernel where that config entry has been disabled.
+ * @ignore_sysfs_fallback: force to disable the sysfs fallback mechanism.
+ *	This emulates the behaviour as if we had set the kernel
+ *	config CONFIG_FW_LOADER_USER_HELPER=n.
+ * @old_timeout: for internal use
+ * @loading_timeout: the timeout to wait for the fallback mechanism before
+ *	giving up, in seconds.
+ */
+struct firmware_fallback_config {
+	unsigned int force_sysfs_fallback;
+	unsigned int ignore_sysfs_fallback;
+	int old_timeout;
+	int loading_timeout;
+};
+
+/* These getters are vetted to use int properly */
+static inline int __firmware_loading_timeout(void)
+{
+	return fw_fallback_config.loading_timeout;
+}
+
+/* These setters are vetted to use int properly */
+static inline void __fw_fallback_set_timeout(int timeout)
+{
+	fw_fallback_config.loading_timeout = timeout;
+}
+#endif
+
+#ifdef CONFIG_FW_LOADER_SYSFS
+int register_sysfs_loader(void);
+void unregister_sysfs_loader(void);
+#if defined(CONFIG_FW_LOADER_USER_HELPER) && defined(CONFIG_SYSCTL)
+int register_firmware_config_sysctl(void);
+void unregister_firmware_config_sysctl(void);
+#else
+static inline int register_firmware_config_sysctl(void)
+{
+	return 0;
+}
+
+static inline void unregister_firmware_config_sysctl(void) { }
+#endif /* CONFIG_FW_LOADER_USER_HELPER && CONFIG_SYSCTL */
+#else /* CONFIG_FW_LOADER_SYSFS */
+static inline int register_sysfs_loader(void)
+{
+	return 0;
+}
+
+static inline void unregister_sysfs_loader(void)
+{
+}
+#endif /* CONFIG_FW_LOADER_SYSFS */
+
+struct fw_sysfs {
+	bool nowait;
+	struct device dev;
+	struct fw_priv *fw_priv;
+	struct firmware *fw;
+	void *fw_upload_priv;
+};
+#define to_fw_sysfs(__dev)	container_of_const(__dev, struct fw_sysfs, dev)
+
+void __fw_load_abort(struct fw_priv *fw_priv);
+
+static inline void fw_load_abort(struct fw_sysfs *fw_sysfs)
+{
+	struct fw_priv *fw_priv = fw_sysfs->fw_priv;
+
+	__fw_load_abort(fw_priv);
+}
+
+struct fw_sysfs *
+fw_create_instance(struct firmware *firmware, const char *fw_name,
+		   struct device *device, u32 opt_flags);
+
+#ifdef CONFIG_FW_UPLOAD
+extern struct device_attribute dev_attr_status;
+extern struct device_attribute dev_attr_error;
+extern struct device_attribute dev_attr_cancel;
+extern struct device_attribute dev_attr_remaining_size;
+
+int fw_upload_start(struct fw_sysfs *fw_sysfs);
+void fw_upload_free(struct fw_sysfs *fw_sysfs);
+umode_t fw_upload_is_visible(struct kobject *kobj, struct attribute *attr, int n);
+#else
+static inline int fw_upload_start(struct fw_sysfs *fw_sysfs)
+{
+	return 0;
+}
+
+static inline void fw_upload_free(struct fw_sysfs *fw_sysfs)
+{
+}
+#endif
+
+#endif /* __FIRMWARE_SYSFS_H */
diff --git a/drivers/base/firmware_loader/sysfs_upload.c b/drivers/base/firmware_loader/sysfs_upload.c
new file mode 100644
index 000000000000..c3797b93c5f5
--- /dev/null
+++ b/drivers/base/firmware_loader/sysfs_upload.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "sysfs_upload.h"
+
+/*
+ * Support for user-space to initiate a firmware upload to a device.
+ */
+
+static const char * const fw_upload_prog_str[] = {
+	[FW_UPLOAD_PROG_IDLE]	      = "idle",
+	[FW_UPLOAD_PROG_RECEIVING]    = "receiving",
+	[FW_UPLOAD_PROG_PREPARING]    = "preparing",
+	[FW_UPLOAD_PROG_TRANSFERRING] = "transferring",
+	[FW_UPLOAD_PROG_PROGRAMMING]  = "programming"
+};
+
+static const char * const fw_upload_err_str[] = {
+	[FW_UPLOAD_ERR_NONE]	     = "none",
+	[FW_UPLOAD_ERR_HW_ERROR]     = "hw-error",
+	[FW_UPLOAD_ERR_TIMEOUT]	     = "timeout",
+	[FW_UPLOAD_ERR_CANCELED]     = "user-abort",
+	[FW_UPLOAD_ERR_BUSY]	     = "device-busy",
+	[FW_UPLOAD_ERR_INVALID_SIZE] = "invalid-file-size",
+	[FW_UPLOAD_ERR_RW_ERROR]     = "read-write-error",
+	[FW_UPLOAD_ERR_WEAROUT]	     = "flash-wearout",
+	[FW_UPLOAD_ERR_FW_INVALID]   = "firmware-invalid",
+};
+
+static const char *fw_upload_progress(struct device *dev,
+				      enum fw_upload_prog prog)
+{
+	const char *status = "unknown-status";
+
+	if (prog < FW_UPLOAD_PROG_MAX)
+		status = fw_upload_prog_str[prog];
+	else
+		dev_err(dev, "Invalid status during secure update: %d\n", prog);
+
+	return status;
+}
+
+static const char *fw_upload_error(struct device *dev,
+				   enum fw_upload_err err_code)
+{
+	const char *error = "unknown-error";
+
+	if (err_code < FW_UPLOAD_ERR_MAX)
+		error = fw_upload_err_str[err_code];
+	else
+		dev_err(dev, "Invalid error code during secure update: %d\n",
+			err_code);
+
+	return error;
+}
+
+static ssize_t
+status_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct fw_upload_priv *fwlp = to_fw_sysfs(dev)->fw_upload_priv;
+
+	return sysfs_emit(buf, "%s\n", fw_upload_progress(dev, fwlp->progress));
+}
+DEVICE_ATTR_RO(status);
+
+static ssize_t
+error_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct fw_upload_priv *fwlp = to_fw_sysfs(dev)->fw_upload_priv;
+	int ret;
+
+	mutex_lock(&fwlp->lock);
+
+	if (fwlp->progress != FW_UPLOAD_PROG_IDLE)
+		ret = -EBUSY;
+	else if (!fwlp->err_code)
+		ret = 0;
+	else
+		ret = sysfs_emit(buf, "%s:%s\n",
+				 fw_upload_progress(dev, fwlp->err_progress),
+				 fw_upload_error(dev, fwlp->err_code));
+
+	mutex_unlock(&fwlp->lock);
+
+	return ret;
+}
+DEVICE_ATTR_RO(error);
+
+static ssize_t cancel_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct fw_upload_priv *fwlp = to_fw_sysfs(dev)->fw_upload_priv;
+	int ret = count;
+	bool cancel;
+
+	if (kstrtobool(buf, &cancel) || !cancel)
+		return -EINVAL;
+
+	mutex_lock(&fwlp->lock);
+	if (fwlp->progress == FW_UPLOAD_PROG_IDLE) {
+		mutex_unlock(&fwlp->lock);
+		return -ENODEV;
+	}
+
+	fwlp->ops->cancel(fwlp->fw_upload);
+	mutex_unlock(&fwlp->lock);
+
+	return ret;
+}
+DEVICE_ATTR_WO(cancel);
+
+static ssize_t remaining_size_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct fw_upload_priv *fwlp = to_fw_sysfs(dev)->fw_upload_priv;
+
+	return sysfs_emit(buf, "%u\n", fwlp->remaining_size);
+}
+DEVICE_ATTR_RO(remaining_size);
+
+umode_t
+fw_upload_is_visible(struct kobject *kobj, struct attribute *attr, int n)
+{
+	static struct fw_sysfs *fw_sysfs;
+
+	fw_sysfs = to_fw_sysfs(kobj_to_dev(kobj));
+
+	if (fw_sysfs->fw_upload_priv || attr == &dev_attr_loading.attr)
+		return attr->mode;
+
+	return 0;
+}
+
+static void fw_upload_update_progress(struct fw_upload_priv *fwlp,
+				      enum fw_upload_prog new_progress)
+{
+	mutex_lock(&fwlp->lock);
+	fwlp->progress = new_progress;
+	mutex_unlock(&fwlp->lock);
+}
+
+static void fw_upload_set_error(struct fw_upload_priv *fwlp,
+				enum fw_upload_err err_code)
+{
+	mutex_lock(&fwlp->lock);
+	fwlp->err_progress = fwlp->progress;
+	fwlp->err_code = err_code;
+	mutex_unlock(&fwlp->lock);
+}
+
+static void fw_upload_prog_complete(struct fw_upload_priv *fwlp)
+{
+	mutex_lock(&fwlp->lock);
+	fwlp->progress = FW_UPLOAD_PROG_IDLE;
+	mutex_unlock(&fwlp->lock);
+}
+
+static void fw_upload_main(struct work_struct *work)
+{
+	struct fw_upload_priv *fwlp;
+	struct fw_sysfs *fw_sysfs;
+	u32 written = 0, offset = 0;
+	enum fw_upload_err ret;
+	struct device *fw_dev;
+	struct fw_upload *fwl;
+
+	fwlp = container_of(work, struct fw_upload_priv, work);
+	fwl = fwlp->fw_upload;
+	fw_sysfs = (struct fw_sysfs *)fwl->priv;
+	fw_dev = &fw_sysfs->dev;
+
+	fw_upload_update_progress(fwlp, FW_UPLOAD_PROG_PREPARING);
+	ret = fwlp->ops->prepare(fwl, fwlp->data, fwlp->remaining_size);
+	if (ret != FW_UPLOAD_ERR_NONE) {
+		fw_upload_set_error(fwlp, ret);
+		goto putdev_exit;
+	}
+
+	fw_upload_update_progress(fwlp, FW_UPLOAD_PROG_TRANSFERRING);
+	while (fwlp->remaining_size) {
+		ret = fwlp->ops->write(fwl, fwlp->data, offset,
+					fwlp->remaining_size, &written);
+		if (ret != FW_UPLOAD_ERR_NONE || !written) {
+			if (ret == FW_UPLOAD_ERR_NONE) {
+				dev_warn(fw_dev, "write-op wrote zero data\n");
+				ret = FW_UPLOAD_ERR_RW_ERROR;
+			}
+			fw_upload_set_error(fwlp, ret);
+			goto done;
+		}
+
+		fwlp->remaining_size -= written;
+		offset += written;
+	}
+
+	fw_upload_update_progress(fwlp, FW_UPLOAD_PROG_PROGRAMMING);
+	ret = fwlp->ops->poll_complete(fwl);
+	if (ret != FW_UPLOAD_ERR_NONE)
+		fw_upload_set_error(fwlp, ret);
+
+done:
+	if (fwlp->ops->cleanup)
+		fwlp->ops->cleanup(fwl);
+
+putdev_exit:
+	put_device(fw_dev->parent);
+
+	/*
+	 * Note: fwlp->remaining_size is left unmodified here to provide
+	 * additional information on errors. It will be reinitialized when
+	 * the next firmeware upload begins.
+	 */
+	mutex_lock(&fw_lock);
+	fw_free_paged_buf(fw_sysfs->fw_priv);
+	fw_state_init(fw_sysfs->fw_priv);
+	mutex_unlock(&fw_lock);
+	fwlp->data = NULL;
+	fw_upload_prog_complete(fwlp);
+}
+
+/*
+ * Start a worker thread to upload data to the parent driver.
+ * Must be called with fw_lock held.
+ */
+int fw_upload_start(struct fw_sysfs *fw_sysfs)
+{
+	struct fw_priv *fw_priv = fw_sysfs->fw_priv;
+	struct device *fw_dev = &fw_sysfs->dev;
+	struct fw_upload_priv *fwlp;
+
+	if (!fw_sysfs->fw_upload_priv)
+		return 0;
+
+	if (!fw_priv->size) {
+		fw_free_paged_buf(fw_priv);
+		fw_state_init(fw_sysfs->fw_priv);
+		return 0;
+	}
+
+	fwlp = fw_sysfs->fw_upload_priv;
+	mutex_lock(&fwlp->lock);
+
+	/* Do not interfere with an on-going fw_upload */
+	if (fwlp->progress != FW_UPLOAD_PROG_IDLE) {
+		mutex_unlock(&fwlp->lock);
+		return -EBUSY;
+	}
+
+	get_device(fw_dev->parent); /* released in fw_upload_main */
+
+	fwlp->progress = FW_UPLOAD_PROG_RECEIVING;
+	fwlp->err_code = 0;
+	fwlp->remaining_size = fw_priv->size;
+	fwlp->data = fw_priv->data;
+
+	pr_debug("%s: fw-%s fw_priv=%p data=%p size=%u\n",
+		 __func__, fw_priv->fw_name,
+		 fw_priv, fw_priv->data,
+		 (unsigned int)fw_priv->size);
+
+	queue_work(system_long_wq, &fwlp->work);
+	mutex_unlock(&fwlp->lock);
+
+	return 0;
+}
+
+void fw_upload_free(struct fw_sysfs *fw_sysfs)
+{
+	struct fw_upload_priv *fw_upload_priv = fw_sysfs->fw_upload_priv;
+
+	free_fw_priv(fw_sysfs->fw_priv);
+	kfree(fw_upload_priv->fw_upload);
+	kfree(fw_upload_priv);
+}
+
+/**
+ * firmware_upload_register() - register for the firmware upload sysfs API
+ * @module: kernel module of this device
+ * @parent: parent device instantiating firmware upload
+ * @name: firmware name to be associated with this device
+ * @ops: pointer to structure of firmware upload ops
+ * @dd_handle: pointer to parent driver private data
+ *
+ *	@name must be unique among all users of firmware upload. The firmware
+ *	sysfs files for this device will be found at /sys/class/firmware/@name.
+ *
+ *	Return: struct fw_upload pointer or ERR_PTR()
+ *
+ **/
+struct fw_upload *
+firmware_upload_register(struct module *module, struct device *parent,
+			 const char *name, const struct fw_upload_ops *ops,
+			 void *dd_handle)
+{
+	u32 opt_flags = FW_OPT_NOCACHE;
+	struct fw_upload *fw_upload;
+	struct fw_upload_priv *fw_upload_priv;
+	struct fw_sysfs *fw_sysfs;
+	struct fw_priv *fw_priv;
+	struct device *fw_dev;
+	int ret;
+
+	if (!name || name[0] == '\0')
+		return ERR_PTR(-EINVAL);
+
+	if (!ops || !ops->cancel || !ops->prepare ||
+	    !ops->write || !ops->poll_complete) {
+		dev_err(parent, "Attempt to register without all required ops\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!try_module_get(module))
+		return ERR_PTR(-EFAULT);
+
+	fw_upload = kzalloc(sizeof(*fw_upload), GFP_KERNEL);
+	if (!fw_upload) {
+		ret = -ENOMEM;
+		goto exit_module_put;
+	}
+
+	fw_upload_priv = kzalloc(sizeof(*fw_upload_priv), GFP_KERNEL);
+	if (!fw_upload_priv) {
+		ret = -ENOMEM;
+		goto free_fw_upload;
+	}
+
+	fw_upload_priv->fw_upload = fw_upload;
+	fw_upload_priv->ops = ops;
+	mutex_init(&fw_upload_priv->lock);
+	fw_upload_priv->module = module;
+	fw_upload_priv->name = name;
+	fw_upload_priv->err_code = 0;
+	fw_upload_priv->progress = FW_UPLOAD_PROG_IDLE;
+	INIT_WORK(&fw_upload_priv->work, fw_upload_main);
+	fw_upload->dd_handle = dd_handle;
+
+	fw_sysfs = fw_create_instance(NULL, name, parent, opt_flags);
+	if (IS_ERR(fw_sysfs)) {
+		ret = PTR_ERR(fw_sysfs);
+		goto free_fw_upload_priv;
+	}
+	fw_upload->priv = fw_sysfs;
+	fw_sysfs->fw_upload_priv = fw_upload_priv;
+	fw_dev = &fw_sysfs->dev;
+
+	ret = alloc_lookup_fw_priv(name, &fw_cache, &fw_priv,  NULL, 0, 0,
+				   FW_OPT_NOCACHE);
+	if (ret != 0) {
+		if (ret > 0)
+			ret = -EINVAL;
+		goto free_fw_sysfs;
+	}
+	fw_priv->is_paged_buf = true;
+	fw_sysfs->fw_priv = fw_priv;
+
+	ret = device_add(fw_dev);
+	if (ret) {
+		dev_err(fw_dev, "%s: device_register failed\n", __func__);
+		put_device(fw_dev);
+		goto exit_module_put;
+	}
+
+	return fw_upload;
+
+free_fw_sysfs:
+	kfree(fw_sysfs);
+
+free_fw_upload_priv:
+	kfree(fw_upload_priv);
+
+free_fw_upload:
+	kfree(fw_upload);
+
+exit_module_put:
+	module_put(module);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(firmware_upload_register);
+
+/**
+ * firmware_upload_unregister() - Unregister firmware upload interface
+ * @fw_upload: pointer to struct fw_upload
+ **/
+void firmware_upload_unregister(struct fw_upload *fw_upload)
+{
+	struct fw_sysfs *fw_sysfs = fw_upload->priv;
+	struct fw_upload_priv *fw_upload_priv = fw_sysfs->fw_upload_priv;
+	struct module *module = fw_upload_priv->module;
+
+	mutex_lock(&fw_upload_priv->lock);
+	if (fw_upload_priv->progress == FW_UPLOAD_PROG_IDLE) {
+		mutex_unlock(&fw_upload_priv->lock);
+		goto unregister;
+	}
+
+	fw_upload_priv->ops->cancel(fw_upload);
+	mutex_unlock(&fw_upload_priv->lock);
+
+	/* Ensure lower-level device-driver is finished */
+	flush_work(&fw_upload_priv->work);
+
+unregister:
+	device_unregister(&fw_sysfs->dev);
+	module_put(module);
+}
+EXPORT_SYMBOL_GPL(firmware_upload_unregister);
diff --git a/drivers/base/firmware_loader/sysfs_upload.h b/drivers/base/firmware_loader/sysfs_upload.h
new file mode 100644
index 000000000000..31931ff7808a
--- /dev/null
+++ b/drivers/base/firmware_loader/sysfs_upload.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SYSFS_UPLOAD_H
+#define __SYSFS_UPLOAD_H
+
+#include <linux/device.h>
+
+#include "sysfs.h"
+
+/**
+ * enum fw_upload_prog - firmware upload progress codes
+ * @FW_UPLOAD_PROG_IDLE: there is no firmware upload in progress
+ * @FW_UPLOAD_PROG_RECEIVING: worker thread is receiving firmware data
+ * @FW_UPLOAD_PROG_PREPARING: target device is preparing for firmware upload
+ * @FW_UPLOAD_PROG_TRANSFERRING: data is being copied to the device
+ * @FW_UPLOAD_PROG_PROGRAMMING: device is performing the firmware update
+ * @FW_UPLOAD_PROG_MAX: Maximum progress code marker
+ */
+enum fw_upload_prog {
+	FW_UPLOAD_PROG_IDLE,
+	FW_UPLOAD_PROG_RECEIVING,
+	FW_UPLOAD_PROG_PREPARING,
+	FW_UPLOAD_PROG_TRANSFERRING,
+	FW_UPLOAD_PROG_PROGRAMMING,
+	FW_UPLOAD_PROG_MAX
+};
+
+struct fw_upload_priv {
+	struct fw_upload *fw_upload;
+	struct module *module;
+	const char *name;
+	const struct fw_upload_ops *ops;
+	struct mutex lock;		  /* protect data structure contents */
+	struct work_struct work;
+	const u8 *data;			  /* pointer to update data */
+	u32 remaining_size;		  /* size remaining to transfer */
+	enum fw_upload_prog progress;
+	enum fw_upload_prog err_progress; /* progress at time of failure */
+	enum fw_upload_err err_code;	  /* security manager error code */
+};
+
+#endif /* __SYSFS_UPLOAD_H */
diff --git a/drivers/base/init.c b/drivers/base/init.c
index a9f57c22fb9e..9d2b06d65dfc 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/memory.h>
 #include <linux/of.h>
+#include <linux/backing-dev.h>
 
 #include "base.h"
 
@@ -20,6 +21,7 @@
 void __init driver_init(void)
 {
 	/* These are the core pieces */
+	bdi_init(&noop_backing_dev_info);
 	devtmpfs_init();
 	devices_init();
 	buses_init();
@@ -30,10 +32,12 @@ void __init driver_init(void)
 	/* These are also core pieces, but must come after the
 	 * core core pieces.
 	 */
+	faux_bus_init();
 	of_core_init();
 	platform_bus_init();
 	auxiliary_bus_init();
-	cpu_dev_init();
 	memory_dev_init();
+	node_dev_init();
+	cpu_dev_init();
 	container_dev_init();
 }
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index aa4737667026..bfd9215c9070 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -23,7 +23,7 @@ struct isa_dev {
 
 #define to_isa_dev(x) container_of((x), struct isa_dev, dev)
 
-static int isa_bus_match(struct device *dev, struct device_driver *driver)
+static int isa_bus_match(struct device *dev, const struct device_driver *driver)
 {
 	struct isa_driver *isa_driver = to_isa_driver(driver);
 
@@ -46,14 +46,12 @@ static int isa_bus_probe(struct device *dev)
 	return 0;
 }
 
-static int isa_bus_remove(struct device *dev)
+static void isa_bus_remove(struct device *dev)
 {
 	struct isa_driver *isa_driver = dev->platform_data;
 
 	if (isa_driver && isa_driver->remove)
 		isa_driver->remove(dev, to_isa_dev(dev)->id);
-
-	return 0;
 }
 
 static void isa_bus_shutdown(struct device *dev)
@@ -84,7 +82,7 @@ static int isa_bus_resume(struct device *dev)
 	return 0;
 }
 
-static struct bus_type isa_bus_type = {
+static const struct bus_type isa_bus_type = {
 	.name		= "isa",
 	.match		= isa_bus_match,
 	.probe		= isa_bus_probe,
@@ -151,11 +149,8 @@ int isa_register_driver(struct isa_driver *isa_driver, unsigned int ndev)
 			break;
 		}
 
-		if (isa_dev->dev.platform_data) {
-			isa_dev->next = isa_driver->devices;
-			isa_driver->devices = &isa_dev->dev;
-		} else
-			device_unregister(&isa_dev->dev);
+		isa_dev->next = isa_driver->devices;
+		isa_driver->devices = &isa_dev->dev;
 	}
 
 	if (!error && !isa_driver->devices)
diff --git a/drivers/base/map.c b/drivers/base/map.c
index 5650ab2b247a..83aeb09ca161 100644
--- a/drivers/base/map.c
+++ b/drivers/base/map.c
@@ -33,9 +33,9 @@ int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
 	     struct module *module, kobj_probe_t *probe,
 	     int (*lock)(dev_t, void *), void *data)
 {
-	unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
-	unsigned index = MAJOR(dev);
-	unsigned i;
+	unsigned int n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
+	unsigned int index = MAJOR(dev);
+	unsigned int i;
 	struct probe *p;
 
 	if (n > 255)
@@ -67,9 +67,9 @@ int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
 
 void kobj_unmap(struct kobj_map *domain, dev_t dev, unsigned long range)
 {
-	unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
-	unsigned index = MAJOR(dev);
-	unsigned i;
+	unsigned int n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
+	unsigned int index = MAJOR(dev);
+	unsigned int i;
 	struct probe *found = NULL;
 
 	if (n > 255)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index aa31a21f33d7..751f248ca4a8 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -22,6 +22,7 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 #include <linux/xarray.h>
+#include <linux/export.h>
 
 #include <linux/atomic.h>
 #include <linux/uaccess.h>
@@ -48,27 +49,13 @@ int mhp_online_type_from_str(const char *str)
 
 #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
 
-static int sections_per_block;
-
-static inline unsigned long memory_block_id(unsigned long section_nr)
-{
-	return section_nr / sections_per_block;
-}
-
-static inline unsigned long pfn_to_block_id(unsigned long pfn)
-{
-	return memory_block_id(pfn_to_section_nr(pfn));
-}
-
-static inline unsigned long phys_to_block_id(unsigned long phys)
-{
-	return pfn_to_block_id(PFN_DOWN(phys));
-}
+int sections_per_block;
+EXPORT_SYMBOL(sections_per_block);
 
 static int memory_subsys_online(struct device *dev);
 static int memory_subsys_offline(struct device *dev);
 
-static struct bus_type memory_subsys = {
+static const struct bus_type memory_subsys = {
 	.name = MEMORY_CLASS_NAME,
 	.dev_name = MEMORY_CLASS_NAME,
 	.online = memory_subsys_online,
@@ -82,6 +69,12 @@ static struct bus_type memory_subsys = {
  */
 static DEFINE_XARRAY(memory_blocks);
 
+/*
+ * Memory groups, indexed by memory group id (mgid).
+ */
+static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC);
+#define MEMORY_GROUP_MARK_DYNAMIC	XA_MARK_1
+
 static BLOCKING_NOTIFIER_HEAD(memory_chain);
 
 int register_memory_notifier(struct notifier_block *nb)
@@ -99,28 +92,75 @@ EXPORT_SYMBOL(unregister_memory_notifier);
 static void memory_block_release(struct device *dev)
 {
 	struct memory_block *mem = to_memory_block(dev);
-
+	/* Verify that the altmap is freed */
+	WARN_ON(mem->altmap);
 	kfree(mem);
 }
 
+
+/* Max block size to be set by memory_block_advise_max_size */
+static unsigned long memory_block_advised_size;
+static bool memory_block_advised_size_queried;
+
+/**
+ * memory_block_advise_max_size() - advise memory hotplug on the max suggested
+ *				    block size, usually for alignment.
+ * @size: suggestion for maximum block size. must be aligned on power of 2.
+ *
+ * Early boot software (pre-allocator init) may advise archs on the max block
+ * size. This value can only decrease after initialization, as the intent is
+ * to identify the largest supported alignment for all sources.
+ *
+ * Use of this value is arch-defined, as is min/max block size.
+ *
+ * Return: 0 on success
+ *	   -EINVAL if size is 0 or not pow2 aligned
+ *	   -EBUSY if value has already been probed
+ */
+int __init memory_block_advise_max_size(unsigned long size)
+{
+	if (!size || !is_power_of_2(size))
+		return -EINVAL;
+
+	if (memory_block_advised_size_queried)
+		return -EBUSY;
+
+	if (memory_block_advised_size)
+		memory_block_advised_size = min(memory_block_advised_size, size);
+	else
+		memory_block_advised_size = size;
+
+	return 0;
+}
+
+/**
+ * memory_block_advised_max_size() - query advised max hotplug block size.
+ *
+ * After the first call, the value can never change. Callers looking for the
+ * actual block size should use memory_block_size_bytes. This interface is
+ * intended for use by arch-init when initializing the hotplug block size.
+ *
+ * Return: advised size in bytes, or 0 if never set.
+ */
+unsigned long memory_block_advised_max_size(void)
+{
+	memory_block_advised_size_queried = true;
+	return memory_block_advised_size;
+}
+
 unsigned long __weak memory_block_size_bytes(void)
 {
 	return MIN_MEMORY_BLOCK_SIZE;
 }
 EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 
-/*
- * Show the first physical section index (number) of this memory block.
- */
+/* Show the memory block ID, relative to the memory block size */
 static ssize_t phys_index_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
 	struct memory_block *mem = to_memory_block(dev);
-	unsigned long phys_index;
-
-	phys_index = mem->start_section_nr / sections_per_block;
 
-	return sysfs_emit(buf, "%08lx\n", phys_index);
+	return sysfs_emit(buf, "%08lx\n", memory_block_id(mem->start_section_nr));
 }
 
 /*
@@ -158,26 +198,42 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
 		break;
 	default:
 		WARN_ON(1);
-		return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state);
+		return sysfs_emit(buf, "ERROR-UNKNOWN-%d\n", mem->state);
 	}
 
 	return sysfs_emit(buf, "%s\n", output);
 }
 
-int memory_notify(unsigned long val, void *v)
+int memory_notify(enum memory_block_state state, void *v)
 {
-	return blocking_notifier_call_chain(&memory_chain, val, v);
+	return blocking_notifier_call_chain(&memory_chain, state, v);
 }
 
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+static unsigned long memblk_nr_poison(struct memory_block *mem);
+#else
+static inline unsigned long memblk_nr_poison(struct memory_block *mem)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Must acquire mem_hotplug_lock in write mode.
+ */
 static int memory_block_online(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+	unsigned long nr_vmemmap_pages = 0;
 	struct zone *zone;
 	int ret;
 
-	zone = zone_for_pfn_range(mem->online_type, mem->nid, start_pfn, nr_pages);
+	if (memblk_nr_poison(mem))
+		return -EHWPOISON;
+
+	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
+				  start_pfn, nr_pages);
 
 	/*
 	 * Although vmemmap pages have a different lifecycle than the pages
@@ -186,18 +242,22 @@ static int memory_block_online(struct memory_block *mem)
 	 * stage helps to keep accounting easier to follow - e.g vmemmaps
 	 * belong to the same zone as the memory they backed.
 	 */
+	if (mem->altmap)
+		nr_vmemmap_pages = mem->altmap->free;
+
+	mem_hotplug_begin();
 	if (nr_vmemmap_pages) {
 		ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	ret = online_pages(start_pfn + nr_vmemmap_pages,
-			   nr_pages - nr_vmemmap_pages, zone);
+			   nr_pages - nr_vmemmap_pages, zone, mem->group);
 	if (ret) {
 		if (nr_vmemmap_pages)
 			mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
-		return ret;
+		goto out;
 	}
 
 	/*
@@ -205,40 +265,56 @@ static int memory_block_online(struct memory_block *mem)
 	 * now already properly populated.
 	 */
 	if (nr_vmemmap_pages)
-		adjust_present_page_count(zone, nr_vmemmap_pages);
+		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+					  nr_vmemmap_pages);
 
+	mem->zone = zone;
+out:
+	mem_hotplug_done();
 	return ret;
 }
 
+/*
+ * Must acquire mem_hotplug_lock in write mode.
+ */
 static int memory_block_offline(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
-	struct zone *zone;
+	unsigned long nr_vmemmap_pages = 0;
 	int ret;
 
+	if (!mem->zone)
+		return -EINVAL;
+
 	/*
 	 * Unaccount before offlining, such that unpopulated zone and kthreads
 	 * can properly be torn down in offline_pages().
 	 */
-	if (nr_vmemmap_pages) {
-		zone = page_zone(pfn_to_page(start_pfn));
-		adjust_present_page_count(zone, -nr_vmemmap_pages);
-	}
+	if (mem->altmap)
+		nr_vmemmap_pages = mem->altmap->free;
+
+	mem_hotplug_begin();
+	if (nr_vmemmap_pages)
+		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+					  -nr_vmemmap_pages);
 
 	ret = offline_pages(start_pfn + nr_vmemmap_pages,
-			    nr_pages - nr_vmemmap_pages);
+			    nr_pages - nr_vmemmap_pages, mem->zone, mem->group);
 	if (ret) {
 		/* offline_pages() failed. Account back. */
 		if (nr_vmemmap_pages)
-			adjust_present_page_count(zone, nr_vmemmap_pages);
-		return ret;
+			adjust_present_page_count(pfn_to_page(start_pfn),
+						  mem->group, nr_vmemmap_pages);
+		goto out;
 	}
 
 	if (nr_vmemmap_pages)
 		mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
 
+	mem->zone = NULL;
+out:
+	mem_hotplug_done();
 	return ret;
 }
 
@@ -374,12 +450,13 @@ static ssize_t phys_device_show(struct device *dev,
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int print_allowed_zone(char *buf, int len, int nid,
+			      struct memory_group *group,
 			      unsigned long start_pfn, unsigned long nr_pages,
 			      int online_type, struct zone *default_zone)
 {
 	struct zone *zone;
 
-	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
+	zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
 	if (zone == default_zone)
 		return 0;
 
@@ -392,9 +469,10 @@ static ssize_t valid_zones_show(struct device *dev,
 	struct memory_block *mem = to_memory_block(dev);
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+	struct memory_group *group = mem->group;
 	struct zone *default_zone;
-	int len = 0;
-	int nid;
+	int nid = mem->nid;
+	int len;
 
 	/*
 	 * Check the existing zone. Make sure that we do that only on the
@@ -402,27 +480,21 @@ static ssize_t valid_zones_show(struct device *dev,
 	 */
 	if (mem->state == MEM_ONLINE) {
 		/*
-		 * The block contains more than one zone can not be offlined.
-		 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
+		 * If !mem->zone, the memory block spans multiple zones and
+		 * cannot get offlined.
 		 */
-		default_zone = test_pages_in_a_zone(start_pfn,
-						    start_pfn + nr_pages);
-		if (!default_zone)
-			return sysfs_emit(buf, "%s\n", "none");
-		len += sysfs_emit_at(buf, len, "%s", default_zone->name);
-		goto out;
+		return sysfs_emit(buf, "%s\n",
+				  mem->zone ? mem->zone->name : "none");
 	}
 
-	nid = mem->nid;
-	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
-					  nr_pages);
+	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
+					  start_pfn, nr_pages);
 
-	len += sysfs_emit_at(buf, len, "%s", default_zone->name);
-	len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
+	len = sysfs_emit(buf, "%s", default_zone->name);
+	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
 				  MMOP_ONLINE_KERNEL, default_zone);
-	len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
+	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
 				  MMOP_ONLINE_MOVABLE, default_zone);
-out:
 	len += sysfs_emit_at(buf, len, "\n");
 	return len;
 }
@@ -453,7 +525,7 @@ static ssize_t auto_online_blocks_show(struct device *dev,
 				       struct device_attribute *attr, char *buf)
 {
 	return sysfs_emit(buf, "%s\n",
-			  online_type_to_str[mhp_default_online_type]);
+			  online_type_to_str[mhp_get_default_online_type()]);
 }
 
 static ssize_t auto_online_blocks_store(struct device *dev,
@@ -465,12 +537,22 @@ static ssize_t auto_online_blocks_store(struct device *dev,
 	if (online_type < 0)
 		return -EINVAL;
 
-	mhp_default_online_type = online_type;
+	mhp_set_default_online_type(online_type);
 	return count;
 }
 
 static DEVICE_ATTR_RW(auto_online_blocks);
 
+#ifdef CONFIG_CRASH_HOTPLUG
+#include <linux/kexec.h>
+static ssize_t crash_hotplug_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", crash_check_hotplug_support());
+}
+static DEVICE_ATTR_RO(crash_hotplug);
+#endif
+
 /*
  * Some architectures will have custom drivers to do this, and
  * will not need to do it from userspace.  The fake hot-add code
@@ -546,7 +628,9 @@ static ssize_t hard_offline_page_store(struct device *dev,
 	if (kstrtoull(buf, 0, &pfn) < 0)
 		return -EINVAL;
 	pfn >>= PAGE_SHIFT;
-	ret = memory_failure(pfn, 0);
+	ret = memory_failure(pfn, MF_SW_SIMULATED);
+	if (ret == -EOPNOTSUPP)
+		ret = 0;
 	return ret ? ret : count;
 }
 
@@ -565,7 +649,7 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn)
  *
  * Called under device_hotplug_lock.
  */
-static struct memory_block *find_memory_block_by_id(unsigned long block_id)
+struct memory_block *find_memory_block_by_id(unsigned long block_id)
 {
 	struct memory_block *mem;
 
@@ -578,9 +662,9 @@ static struct memory_block *find_memory_block_by_id(unsigned long block_id)
 /*
  * Called under device_hotplug_lock.
  */
-struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block(unsigned long section_nr)
 {
-	unsigned long block_id = memory_block_id(__section_nr(section));
+	unsigned long block_id = memory_block_id(section_nr);
 
 	return find_memory_block_by_id(block_id);
 }
@@ -605,11 +689,7 @@ static const struct attribute_group *memory_memblk_attr_groups[] = {
 	NULL,
 };
 
-/*
- * register_memory - Setup a sysfs device for a memory block
- */
-static
-int register_memory(struct memory_block *memory)
+static int __add_memory_block(struct memory_block *memory)
 {
 	int ret;
 
@@ -626,15 +706,91 @@ int register_memory(struct memory_block *memory)
 	}
 	ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
 			      GFP_KERNEL));
-	if (ret) {
-		put_device(&memory->dev);
+	if (ret)
 		device_unregister(&memory->dev);
-	}
+
 	return ret;
 }
 
-static int init_memory_block(unsigned long block_id, unsigned long state,
-			     unsigned long nr_vmemmap_pages)
+static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
+						     int nid)
+{
+	const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
+	const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+	struct zone *zone, *matching_zone = NULL;
+	pg_data_t *pgdat = NODE_DATA(nid);
+	int i;
+
+	/*
+	 * This logic only works for early memory, when the applicable zones
+	 * already span the memory block. We don't expect overlapping zones on
+	 * a single node for early memory. So if we're told that some PFNs
+	 * of a node fall into this memory block, we can assume that all node
+	 * zones that intersect with the memory block are actually applicable.
+	 * No need to look at the memmap.
+	 */
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		zone = pgdat->node_zones + i;
+		if (!populated_zone(zone))
+			continue;
+		if (!zone_intersects(zone, start_pfn, nr_pages))
+			continue;
+		if (!matching_zone) {
+			matching_zone = zone;
+			continue;
+		}
+		/* Spans multiple zones ... */
+		matching_zone = NULL;
+		break;
+	}
+	return matching_zone;
+}
+
+#ifdef CONFIG_NUMA
+/**
+ * memory_block_add_nid_early() - Indicate that early system RAM falling into
+ *				  this memory block device (partially) belongs
+ *				  to the given node.
+ * @mem: The memory block device.
+ * @nid: The node id.
+ *
+ * Indicate that early system RAM falling into this memory block (partially)
+ * belongs to the given node. This will also properly set/adjust mem->zone based
+ * on the zone ranges of the given node.
+ *
+ * Memory hotplug handles this on memory block creation, where we can only have
+ * a single nid span a memory block.
+ */
+void memory_block_add_nid_early(struct memory_block *mem, int nid)
+{
+	if (mem->nid != nid) {
+		/*
+		 * For early memory we have to determine the zone when setting
+		 * the node id and handle multiple nodes spanning a single
+		 * memory block by indicate via zone == NULL that we're not
+		 * dealing with a single zone. So if we're setting the node id
+		 * the first time, determine if there is a single zone. If we're
+		 * setting the node id a second time to a different node,
+		 * invalidate the single detected zone.
+		 */
+		if (mem->nid == NUMA_NO_NODE)
+			mem->zone = early_node_zone_for_memory_block(mem, nid);
+		else
+			mem->zone = NULL;
+		/*
+		 * If this memory block spans multiple nodes, we only indicate
+		 * the last processed node. If we span multiple nodes (not applicable
+		 * to hotplugged memory), zone == NULL will prohibit memory offlining
+		 * and consequently unplug.
+		 */
+		mem->nid = nid;
+	}
+}
+#endif
+
+static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
+			    struct vmem_altmap *altmap,
+			    struct memory_group *group)
 {
 	struct memory_block *mem;
 	int ret = 0;
@@ -650,37 +806,45 @@ static int init_memory_block(unsigned long block_id, unsigned long state,
 
 	mem->start_section_nr = block_id * sections_per_block;
 	mem->state = state;
-	mem->nid = NUMA_NO_NODE;
-	mem->nr_vmemmap_pages = nr_vmemmap_pages;
-
-	ret = register_memory(mem);
+	mem->nid = nid;
+	mem->altmap = altmap;
+	INIT_LIST_HEAD(&mem->group_next);
 
-	return ret;
-}
+#ifndef CONFIG_NUMA
+	if (state == MEM_ONLINE)
+		/*
+		 * MEM_ONLINE at this point implies early memory. With NUMA,
+		 * we'll determine the zone when setting the node id via
+		 * memory_block_add_nid(). Memory hotplug updated the zone
+		 * manually when memory onlining/offlining succeeds.
+		 */
+		mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE);
+#endif /* CONFIG_NUMA */
 
-static int add_memory_block(unsigned long base_section_nr)
-{
-	int section_count = 0;
-	unsigned long nr;
+	ret = __add_memory_block(mem);
+	if (ret)
+		return ret;
 
-	for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
-	     nr++)
-		if (present_section_nr(nr))
-			section_count++;
+	if (group) {
+		mem->group = group;
+		list_add(&mem->group_next, &group->memory_blocks);
+	}
 
-	if (section_count == 0)
-		return 0;
-	return init_memory_block(memory_block_id(base_section_nr),
-				 MEM_ONLINE, 0);
+	return 0;
 }
 
-static void unregister_memory(struct memory_block *memory)
+static void remove_memory_block(struct memory_block *memory)
 {
 	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
 		return;
 
 	WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
 
+	if (memory->group) {
+		list_del(&memory->group_next);
+		memory->group = NULL;
+	}
+
 	/* drop the ref. we got via find_memory_block() */
 	put_device(&memory->dev);
 	device_unregister(&memory->dev);
@@ -694,7 +858,8 @@ static void unregister_memory(struct memory_block *memory)
  * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				unsigned long vmemmap_pages)
+				int nid, struct vmem_altmap *altmap,
+				struct memory_group *group)
 {
 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
@@ -707,7 +872,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
 		return -EINVAL;
 
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-		ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages);
+		ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group);
 		if (ret)
 			break;
 	}
@@ -718,7 +883,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
 			mem = find_memory_block_by_id(block_id);
 			if (WARN_ON_ONCE(!mem))
 				continue;
-			unregister_memory(mem);
+			remove_memory_block(mem);
 		}
 	}
 	return ret;
@@ -746,17 +911,12 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 		mem = find_memory_block_by_id(block_id);
 		if (WARN_ON_ONCE(!mem))
 			continue;
+		num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
 		unregister_memory_block_under_nodes(mem);
-		unregister_memory(mem);
+		remove_memory_block(mem);
 	}
 }
 
-/* return true if the memory block is offlined, otherwise, return false */
-bool is_memblock_offlined(struct memory_block *mem)
-{
-	return mem->state == MEM_OFFLINE;
-}
-
 static struct attribute *memory_root_attrs[] = {
 #ifdef CONFIG_ARCH_MEMORY_PROBE
 	&dev_attr_probe.attr,
@@ -769,6 +929,9 @@ static struct attribute *memory_root_attrs[] = {
 
 	&dev_attr_block_size_bytes.attr,
 	&dev_attr_auto_online_blocks.attr,
+#ifdef CONFIG_CRASH_HOTPLUG
+	&dev_attr_crash_hotplug.attr,
+#endif
 	NULL
 };
 
@@ -789,7 +952,7 @@ static const struct attribute_group *memory_root_attr_groups[] = {
 void __init memory_dev_init(void)
 {
 	int ret;
-	unsigned long block_sz, nr;
+	unsigned long block_sz, block_id, nr;
 
 	/* Validate the configured memory block size */
 	block_sz = memory_block_size_bytes();
@@ -802,15 +965,23 @@ void __init memory_dev_init(void)
 		panic("%s() failed to register subsystem: %d\n", __func__, ret);
 
 	/*
-	 * Create entries for memory sections that were found
-	 * during boot and have been initialized
+	 * Create entries for memory sections that were found during boot
+	 * and have been initialized. Use @block_id to track the last
+	 * handled block and initialize it to an invalid value (ULONG_MAX)
+	 * to bypass the block ID matching check for the first present
+	 * block so that it can be covered.
 	 */
-	for (nr = 0; nr <= __highest_present_section_nr;
-	     nr += sections_per_block) {
-		ret = add_memory_block(nr);
-		if (ret)
-			panic("%s() failed to add memory block: %d\n", __func__,
-			      ret);
+	block_id = ULONG_MAX;
+	for_each_present_section_nr(0, nr) {
+		if (block_id != ULONG_MAX && memory_block_id(nr) == block_id)
+			continue;
+
+		block_id = memory_block_id(nr);
+		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
+		if (ret) {
+			panic("%s() failed to add memory block: %d\n",
+			      __func__, ret);
+		}
 	}
 }
 
@@ -891,3 +1062,189 @@ int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
 	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
 				for_each_memory_block_cb);
 }
+
+/*
+ * This is an internal helper to unify allocation and initialization of
+ * memory groups. Note that the passed memory group will be copied to a
+ * dynamically allocated memory group. After this call, the passed
+ * memory group should no longer be used.
+ */
+static int memory_group_register(struct memory_group group)
+{
+	struct memory_group *new_group;
+	uint32_t mgid;
+	int ret;
+
+	if (!node_possible(group.nid))
+		return -EINVAL;
+
+	new_group = kzalloc(sizeof(group), GFP_KERNEL);
+	if (!new_group)
+		return -ENOMEM;
+	*new_group = group;
+	INIT_LIST_HEAD(&new_group->memory_blocks);
+
+	ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b,
+		       GFP_KERNEL);
+	if (ret) {
+		kfree(new_group);
+		return ret;
+	} else if (group.is_dynamic) {
+		xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC);
+	}
+	return mgid;
+}
+
+/**
+ * memory_group_register_static() - Register a static memory group.
+ * @nid: The node id.
+ * @max_pages: The maximum number of pages we'll have in this static memory
+ *	       group.
+ *
+ * Register a new static memory group and return the memory group id.
+ * All memory in the group belongs to a single unit, such as a DIMM. All
+ * memory belonging to a static memory group is added in one go to be removed
+ * in one go -- it's static.
+ *
+ * Returns an error if out of memory, if the node id is invalid, if no new
+ * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
+ * returns the new memory group id.
+ */
+int memory_group_register_static(int nid, unsigned long max_pages)
+{
+	struct memory_group group = {
+		.nid = nid,
+		.s = {
+			.max_pages = max_pages,
+		},
+	};
+
+	if (!max_pages)
+		return -EINVAL;
+	return memory_group_register(group);
+}
+EXPORT_SYMBOL_GPL(memory_group_register_static);
+
+/**
+ * memory_group_register_dynamic() - Register a dynamic memory group.
+ * @nid: The node id.
+ * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
+ *		memory group.
+ *
+ * Register a new dynamic memory group and return the memory group id.
+ * Memory within a dynamic memory group is added/removed dynamically
+ * in unit_pages.
+ *
+ * Returns an error if out of memory, if the node id is invalid, if no new
+ * memory groups can be registered, or if unit_pages is invalid (0, not a
+ * power of two, smaller than a single memory block). Otherwise, returns the
+ * new memory group id.
+ */
+int memory_group_register_dynamic(int nid, unsigned long unit_pages)
+{
+	struct memory_group group = {
+		.nid = nid,
+		.is_dynamic = true,
+		.d = {
+			.unit_pages = unit_pages,
+		},
+	};
+
+	if (!unit_pages || !is_power_of_2(unit_pages) ||
+	    unit_pages < PHYS_PFN(memory_block_size_bytes()))
+		return -EINVAL;
+	return memory_group_register(group);
+}
+EXPORT_SYMBOL_GPL(memory_group_register_dynamic);
+
+/**
+ * memory_group_unregister() - Unregister a memory group.
+ * @mgid: the memory group id
+ *
+ * Unregister a memory group. If any memory block still belongs to this
+ * memory group, unregistering will fail.
+ *
+ * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
+ * memory blocks still belong to this memory group and returns 0 if
+ * unregistering succeeded.
+ */
+int memory_group_unregister(int mgid)
+{
+	struct memory_group *group;
+
+	if (mgid < 0)
+		return -EINVAL;
+
+	group = xa_load(&memory_groups, mgid);
+	if (!group)
+		return -EINVAL;
+	if (!list_empty(&group->memory_blocks))
+		return -EBUSY;
+	xa_erase(&memory_groups, mgid);
+	kfree(group);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(memory_group_unregister);
+
+/*
+ * This is an internal helper only to be used in core memory hotplug code to
+ * lookup a memory group. We don't care about locking, as we don't expect a
+ * memory group to get unregistered while adding memory to it -- because
+ * the group and the memory is managed by the same driver.
+ */
+struct memory_group *memory_group_find_by_id(int mgid)
+{
+	return xa_load(&memory_groups, mgid);
+}
+
+/*
+ * This is an internal helper only to be used in core memory hotplug code to
+ * walk all dynamic memory groups excluding a given memory group, either
+ * belonging to a specific node, or belonging to any node.
+ */
+int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+			       struct memory_group *excluded, void *arg)
+{
+	struct memory_group *group;
+	unsigned long index;
+	int ret = 0;
+
+	xa_for_each_marked(&memory_groups, index, group,
+			   MEMORY_GROUP_MARK_DYNAMIC) {
+		if (group == excluded)
+			continue;
+#ifdef CONFIG_NUMA
+		if (nid != NUMA_NO_NODE && group->nid != nid)
+			continue;
+#endif /* CONFIG_NUMA */
+		ret = func(group, arg);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+void memblk_nr_poison_inc(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_inc(&mem->nr_hwpoison);
+}
+
+void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_sub(i, &mem->nr_hwpoison);
+}
+
+static unsigned long memblk_nr_poison(struct memory_block *mem)
+{
+	return atomic_long_read(&mem->nr_hwpoison);
+}
+#endif
diff --git a/drivers/base/module.c b/drivers/base/module.c
index 46ad4d636731..218aaa096455 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -9,7 +9,7 @@
 #include <linux/string.h>
 #include "base.h"
 
-static char *make_driver_name(struct device_driver *drv)
+static char *make_driver_name(const struct device_driver *drv)
 {
 	char *driver_name;
 
@@ -30,46 +30,67 @@ static void module_create_drivers_dir(struct module_kobject *mk)
 	mutex_unlock(&drivers_dir_mutex);
 }
 
-void module_add_driver(struct module *mod, struct device_driver *drv)
+int module_add_driver(struct module *mod, const struct device_driver *drv)
 {
 	char *driver_name;
-	int no_warn;
 	struct module_kobject *mk = NULL;
+	int ret;
 
 	if (!drv)
-		return;
+		return 0;
 
 	if (mod)
 		mk = &mod->mkobj;
 	else if (drv->mod_name) {
-		struct kobject *mkobj;
-
-		/* Lookup built-in module entry in /sys/modules */
-		mkobj = kset_find_obj(module_kset, drv->mod_name);
-		if (mkobj) {
-			mk = container_of(mkobj, struct module_kobject, kobj);
+		/* Lookup or create built-in module entry in /sys/modules */
+		mk = lookup_or_create_module_kobject(drv->mod_name);
+		if (mk) {
 			/* remember our module structure */
 			drv->p->mkobj = mk;
-			/* kset_find_obj took a reference */
-			kobject_put(mkobj);
+			/* lookup_or_create_module_kobject took a reference */
+			kobject_put(&mk->kobj);
 		}
 	}
 
 	if (!mk)
-		return;
+		return 0;
+
+	ret = sysfs_create_link(&drv->p->kobj, &mk->kobj, "module");
+	if (ret)
+		return ret;
 
-	/* Don't check return codes; these calls are idempotent */
-	no_warn = sysfs_create_link(&drv->p->kobj, &mk->kobj, "module");
 	driver_name = make_driver_name(drv);
-	if (driver_name) {
-		module_create_drivers_dir(mk);
-		no_warn = sysfs_create_link(mk->drivers_dir, &drv->p->kobj,
-					    driver_name);
-		kfree(driver_name);
+	if (!driver_name) {
+		ret = -ENOMEM;
+		goto out_remove_kobj;
 	}
+
+	module_create_drivers_dir(mk);
+	if (!mk->drivers_dir) {
+		ret = -EINVAL;
+		goto out_free_driver_name;
+	}
+
+	ret = sysfs_create_link(mk->drivers_dir, &drv->p->kobj, driver_name);
+	if (ret)
+		goto out_remove_drivers_dir;
+
+	kfree(driver_name);
+
+	return 0;
+
+out_remove_drivers_dir:
+	sysfs_remove_link(mk->drivers_dir, driver_name);
+
+out_free_driver_name:
+	kfree(driver_name);
+
+out_remove_kobj:
+	sysfs_remove_link(&drv->p->kobj, "module");
+	return ret;
 }
 
-void module_remove_driver(struct device_driver *drv)
+void module_remove_driver(const struct device_driver *drv)
 {
 	struct module_kobject *mk = NULL;
 	char *driver_name;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 4a4ae868ad9f..00cf4532f121 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
+#include <linux/mempolicy.h>
 #include <linux/vmstat.h>
 #include <linux/notifier.h>
 #include <linux/node.h>
@@ -20,49 +21,54 @@
 #include <linux/pm_runtime.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/memblock.h>
 
-static struct bus_type node_subsys = {
+static const struct bus_type node_subsys = {
 	.name = "node",
 	.dev_name = "node",
 };
 
-
-static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
+static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
+				  const struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
 {
-	ssize_t n;
-	cpumask_var_t mask;
+	struct device *dev = kobj_to_dev(kobj);
 	struct node *node_dev = to_node(dev);
-
-	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
-	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
+	cpumask_var_t mask;
+	ssize_t n;
 
 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
 		return 0;
 
 	cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
-	n = cpumap_print_to_pagebuf(list, buf, mask);
+	n = cpumap_print_bitmask_to_buf(buf, mask, off, count);
 	free_cpumask_var(mask);
 
 	return n;
 }
 
-static inline ssize_t cpumap_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
+static const BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
+
+static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+				   const struct bin_attribute *attr, char *buf,
+				   loff_t off, size_t count)
 {
-	return node_read_cpumap(dev, false, buf);
-}
+	struct device *dev = kobj_to_dev(kobj);
+	struct node *node_dev = to_node(dev);
+	cpumask_var_t mask;
+	ssize_t n;
 
-static DEVICE_ATTR_RO(cpumap);
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return 0;
 
-static inline ssize_t cpulist_show(struct device *dev,
-				   struct device_attribute *attr,
-				   char *buf)
-{
-	return node_read_cpumap(dev, true, buf);
+	cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
+	n = cpumap_print_list_to_buf(buf, mask, off, count);
+	free_cpumask_var(mask);
+
+	return n;
 }
 
-static DEVICE_ATTR_RO(cpulist);
+static const BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
 
 /**
  * struct node_access_nodes - Access class device to hold user visible
@@ -70,14 +76,14 @@ static DEVICE_ATTR_RO(cpulist);
  * @dev:	Device for this memory access class
  * @list_node:	List element in the node's access list
  * @access:	The access class rank
- * @hmem_attrs: Heterogeneous memory performance attributes
+ * @coord:	Heterogeneous memory performance coordinates
  */
 struct node_access_nodes {
 	struct device		dev;
 	struct list_head	list_node;
-	unsigned		access;
+	unsigned int		access;
 #ifdef CONFIG_HMEM_REPORTING
-	struct node_hmem_attrs	hmem_attrs;
+	struct access_coordinate	coord;
 #endif
 };
 #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
@@ -106,6 +112,27 @@ static const struct attribute_group *node_access_node_groups[] = {
 	NULL,
 };
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static BLOCKING_NOTIFIER_HEAD(node_chain);
+
+int register_node_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&node_chain, nb);
+}
+EXPORT_SYMBOL(register_node_notifier);
+
+void unregister_node_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&node_chain, nb);
+}
+EXPORT_SYMBOL(unregister_node_notifier);
+
+int node_notify(unsigned long val, void *v)
+{
+	return blocking_notifier_call_chain(&node_chain, val, v);
+}
+#endif
+
 static void node_remove_accesses(struct node *node)
 {
 	struct node_access_nodes *c, *cnext;
@@ -122,7 +149,7 @@ static void node_access_release(struct device *dev)
 }
 
 static struct node_access_nodes *node_init_node_access(struct node *node,
-						       unsigned access)
+						       enum access_coordinate_class access)
 {
 	struct node_access_nodes *access_node;
 	struct device *dev;
@@ -157,15 +184,15 @@ free:
 }
 
 #ifdef CONFIG_HMEM_REPORTING
-#define ACCESS_ATTR(name)						\
-static ssize_t name##_show(struct device *dev,				\
+#define ACCESS_ATTR(property)						\
+static ssize_t property##_show(struct device *dev,			\
 			   struct device_attribute *attr,		\
 			   char *buf)					\
 {									\
 	return sysfs_emit(buf, "%u\n",					\
-			  to_access_nodes(dev)->hmem_attrs.name);	\
+			  to_access_nodes(dev)->coord.property);	\
 }									\
-static DEVICE_ATTR_RO(name)
+static DEVICE_ATTR_RO(property)
 
 ACCESS_ATTR(read_bandwidth);
 ACCESS_ATTR(read_latency);
@@ -183,11 +210,11 @@ static struct attribute *access_attrs[] = {
 /**
  * node_set_perf_attrs - Set the performance values for given access class
  * @nid: Node identifier to be set
- * @hmem_attrs: Heterogeneous memory performance attributes
+ * @coord: Heterogeneous memory performance coordinates
  * @access: The access class the for the given attributes
  */
-void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
-			 unsigned access)
+void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			 enum access_coordinate_class access)
 {
 	struct node_access_nodes *c;
 	struct node *node;
@@ -201,7 +228,7 @@ void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
 	if (!c)
 		return;
 
-	c->hmem_attrs = *hmem_attrs;
+	c->coord = *coord;
 	for (i = 0; access_attrs[i] != NULL; i++) {
 		if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
 					    "initiators")) {
@@ -210,7 +237,54 @@ void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
 			break;
 		}
 	}
+
+	/* When setting CPU access coordinates, update mempolicy */
+	if (access == ACCESS_COORDINATE_CPU) {
+		if (mempolicy_set_node_perf(nid, coord)) {
+			pr_info("failed to set mempolicy attrs for node %d\n",
+				nid);
+		}
+	}
 }
+EXPORT_SYMBOL_GPL(node_set_perf_attrs);
+
+/**
+ * node_update_perf_attrs - Update the performance values for given access class
+ * @nid: Node identifier to be updated
+ * @coord: Heterogeneous memory performance coordinates
+ * @access: The access class for the given attributes
+ */
+void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			    enum access_coordinate_class access)
+{
+	struct node_access_nodes *access_node;
+	struct node *node;
+	int i;
+
+	if (WARN_ON_ONCE(!node_online(nid)))
+		return;
+
+	node = node_devices[nid];
+	list_for_each_entry(access_node, &node->access_list, list_node) {
+		if (access_node->access != access)
+			continue;
+
+		access_node->coord = *coord;
+		for (i = 0; access_attrs[i]; i++) {
+			sysfs_notify(&access_node->dev.kobj,
+				     NULL, access_attrs[i]->name);
+		}
+		break;
+	}
+
+	/* When setting CPU access coordinates, update mempolicy */
+	if (access != ACCESS_COORDINATE_CPU)
+		return;
+
+	if (mempolicy_set_node_perf(nid, coord))
+		pr_info("failed to set mempolicy attrs for node %d\n", nid);
+}
+EXPORT_SYMBOL_GPL(node_update_perf_attrs);
 
 /**
  * struct node_cache_info - Internal tracking for memory node caches
@@ -239,12 +313,14 @@ CACHE_ATTR(size, "%llu")
 CACHE_ATTR(line_size, "%u")
 CACHE_ATTR(indexing, "%u")
 CACHE_ATTR(write_policy, "%u")
+CACHE_ATTR(address_mode, "%#x")
 
 static struct attribute *cache_attrs[] = {
 	&dev_attr_indexing.attr,
 	&dev_attr_size.attr,
 	&dev_attr_line_size.attr,
 	&dev_attr_write_policy.attr,
+	&dev_attr_address_mode.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(cache);
@@ -429,6 +505,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     "Node %d ShadowCallStack:%8lu kB\n"
 #endif
 			     "Node %d PageTables:     %8lu kB\n"
+			     "Node %d SecPageTables:  %8lu kB\n"
 			     "Node %d NFS_Unstable:   %8lu kB\n"
 			     "Node %d Bounce:         %8lu kB\n"
 			     "Node %d WritebackTmp:   %8lu kB\n"
@@ -440,8 +517,11 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     "Node %d AnonHugePages:  %8lu kB\n"
 			     "Node %d ShmemHugePages: %8lu kB\n"
 			     "Node %d ShmemPmdMapped: %8lu kB\n"
-			     "Node %d FileHugePages: %8lu kB\n"
-			     "Node %d FilePmdMapped: %8lu kB\n"
+			     "Node %d FileHugePages:  %8lu kB\n"
+			     "Node %d FilePmdMapped:  %8lu kB\n"
+#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+			     "Node %d Unaccepted:     %8lu kB\n"
 #endif
 			     ,
 			     nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -455,9 +535,10 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
 			     nid, K(node_page_state(pgdat, NR_PAGETABLE)),
+			     nid, K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
+			     nid, 0UL,
+			     nid, 0UL,
 			     nid, 0UL,
-			     nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
-			     nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
 			     nid, K(sreclaimable +
 				    node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
 			     nid, K(sreclaimable + sunreclaimable),
@@ -471,6 +552,10 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     nid, K(node_page_state(pgdat, NR_FILE_THPS)),
 			     nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
 #endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+			     ,
+			     nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
+#endif
 			    );
 	len += hugetlb_report_node_meminfo(buf, len, nid);
 	return len;
@@ -557,115 +642,38 @@ static ssize_t node_read_distance(struct device *dev,
 static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
 
 static struct attribute *node_dev_attrs[] = {
-	&dev_attr_cpumap.attr,
-	&dev_attr_cpulist.attr,
 	&dev_attr_meminfo.attr,
 	&dev_attr_numastat.attr,
 	&dev_attr_distance.attr,
 	&dev_attr_vmstat.attr,
 	NULL
 };
-ATTRIBUTE_GROUPS(node_dev);
-
-#ifdef CONFIG_HUGETLBFS
-/*
- * hugetlbfs per node attributes registration interface:
- * When/if hugetlb[fs] subsystem initializes [sometime after this module],
- * it will register its per node attributes for all online nodes with
- * memory.  It will also call register_hugetlbfs_with_node(), below, to
- * register its attribute registration functions with this node driver.
- * Once these hooks have been initialized, the node driver will call into
- * the hugetlb module to [un]register attributes for hot-plugged nodes.
- */
-static node_registration_func_t __hugetlb_register_node;
-static node_registration_func_t __hugetlb_unregister_node;
 
-static inline bool hugetlb_register_node(struct node *node)
-{
-	if (__hugetlb_register_node &&
-			node_state(node->dev.id, N_MEMORY)) {
-		__hugetlb_register_node(node);
-		return true;
-	}
-	return false;
-}
-
-static inline void hugetlb_unregister_node(struct node *node)
-{
-	if (__hugetlb_unregister_node)
-		__hugetlb_unregister_node(node);
-}
+static const struct bin_attribute *node_dev_bin_attrs[] = {
+	&bin_attr_cpumap,
+	&bin_attr_cpulist,
+	NULL
+};
 
-void register_hugetlbfs_with_node(node_registration_func_t doregister,
-				  node_registration_func_t unregister)
-{
-	__hugetlb_register_node   = doregister;
-	__hugetlb_unregister_node = unregister;
-}
-#else
-static inline void hugetlb_register_node(struct node *node) {}
+static const struct attribute_group node_dev_group = {
+	.attrs = node_dev_attrs,
+	.bin_attrs = node_dev_bin_attrs,
+};
 
-static inline void hugetlb_unregister_node(struct node *node) {}
+static const struct attribute_group *node_dev_groups[] = {
+	&node_dev_group,
+#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP
+	&arch_node_dev_group,
 #endif
-
-static void node_device_release(struct device *dev)
-{
-	struct node *node = to_node(dev);
-
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
-	/*
-	 * We schedule the work only when a memory section is
-	 * onlined/offlined on this node. When we come here,
-	 * all the memory on this node has been offlined,
-	 * so we won't enqueue new work to this work.
-	 *
-	 * The work is using node->node_work, so we should
-	 * flush work before freeing the memory.
-	 */
-	flush_work(&node->node_work);
+#ifdef CONFIG_MEMORY_FAILURE
+	&memory_failure_attr_group,
 #endif
-	kfree(node);
-}
-
-/*
- * register_node - Setup a sysfs device for a node.
- * @num - Node number to use when creating the device.
- *
- * Initialize and register the node device.
- */
-static int register_node(struct node *node, int num)
-{
-	int error;
-
-	node->dev.id = num;
-	node->dev.bus = &node_subsys;
-	node->dev.release = node_device_release;
-	node->dev.groups = node_dev_groups;
-	error = device_register(&node->dev);
-
-	if (error)
-		put_device(&node->dev);
-	else {
-		hugetlb_register_node(node);
-
-		compaction_register_node(node);
-	}
-	return error;
-}
+	NULL
+};
 
-/**
- * unregister_node - unregister a node device
- * @node: node going away
- *
- * Unregisters a node device @node.  All the devices on the node must be
- * unregistered before calling this function.
- */
-void unregister_node(struct node *node)
+static void node_device_release(struct device *dev)
 {
-	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
-	node_remove_accesses(node);
-	node_remove_caches(node);
-	device_unregister(&node->dev);
+	kfree(to_node(dev));
 }
 
 struct node *node_devices[MAX_NUMNODES];
@@ -711,7 +719,7 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
  */
 int register_memory_node_under_compute_node(unsigned int mem_nid,
 					    unsigned int cpu_nid,
-					    unsigned access)
+					    enum access_coordinate_class access)
 {
 	struct node *init_node, *targ_node;
 	struct node_access_nodes *initiator, *target;
@@ -765,29 +773,12 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	return 0;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-static int __ref get_nid_for_pfn(unsigned long pfn)
-{
-	if (!pfn_valid_within(pfn))
-		return -1;
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-	if (system_state < SYSTEM_RUNNING)
-		return early_pfn_to_nid(pfn);
-#endif
-	return pfn_to_nid(pfn);
-}
-
+#ifdef CONFIG_MEMORY_HOTPLUG
 static void do_register_memory_block_under_node(int nid,
 						struct memory_block *mem_blk)
 {
 	int ret;
 
-	/*
-	 * If this memory block spans multiple nodes, we only indicate
-	 * the last processed node.
-	 */
-	mem_blk->nid = nid;
-
 	ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
 				       &mem_blk->dev.kobj,
 				       kobject_name(&mem_blk->dev.kobj));
@@ -806,46 +797,6 @@ static void do_register_memory_block_under_node(int nid,
 				    ret);
 }
 
-/* register memory section under specified node if it spans that node */
-static int register_mem_block_under_node_early(struct memory_block *mem_blk,
-					       void *arg)
-{
-	unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
-	unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
-	unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
-	int nid = *(int *)arg;
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
-		int page_nid;
-
-		/*
-		 * memory block could have several absent sections from start.
-		 * skip pfn range from absent section
-		 */
-		if (!pfn_in_present_section(pfn)) {
-			pfn = round_down(pfn + PAGES_PER_SECTION,
-					 PAGES_PER_SECTION) - 1;
-			continue;
-		}
-
-		/*
-		 * We need to check if page belongs to nid only at the boot
-		 * case because node's ranges can be interleaved.
-		 */
-		page_nid = get_nid_for_pfn(pfn);
-		if (page_nid < 0)
-			continue;
-		if (page_nid != nid)
-			continue;
-
-		do_register_memory_block_under_node(nid, mem_blk);
-		return 0;
-	}
-	/* mem section does not span the specified node */
-	return 0;
-}
-
 /*
  * During hotplug we know that all pages in the memory block belong to the same
  * node.
@@ -874,99 +825,77 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
 			  kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
 }
 
-void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
-		       enum meminit_context context)
+/* register all memory blocks under the corresponding nodes */
+static void register_memory_blocks_under_nodes(void)
 {
-	walk_memory_blocks_func_t func;
+	struct memblock_region *r;
 
-	if (context == MEMINIT_HOTPLUG)
-		func = register_mem_block_under_node_hotplug;
-	else
-		func = register_mem_block_under_node_early;
+	for_each_mem_region(r) {
+		const unsigned long start_block_id = phys_to_block_id(r->base);
+		const unsigned long end_block_id = phys_to_block_id(r->base + r->size - 1);
+		const int nid = memblock_get_region_node(r);
+		unsigned long block_id;
 
-	walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
-			   (void *)&nid, func);
-	return;
-}
-
-#ifdef CONFIG_HUGETLBFS
-/*
- * Handle per node hstate attribute [un]registration on transistions
- * to/from memoryless state.
- */
-static void node_hugetlb_work(struct work_struct *work)
-{
-	struct node *node = container_of(work, struct node, node_work);
+		if (!node_online(nid))
+			continue;
 
-	/*
-	 * We only get here when a node transitions to/from memoryless state.
-	 * We can detect which transition occurred by examining whether the
-	 * node has memory now.  hugetlb_register_node() already check this
-	 * so we try to register the attributes.  If that fails, then the
-	 * node has transitioned to memoryless, try to unregister the
-	 * attributes.
-	 */
-	if (!hugetlb_register_node(node))
-		hugetlb_unregister_node(node);
-}
+		for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
+			struct memory_block *mem;
 
-static void init_node_hugetlb_work(int nid)
-{
-	INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
-}
+			mem = find_memory_block_by_id(block_id);
+			if (!mem)
+				continue;
 
-static int node_memory_callback(struct notifier_block *self,
-				unsigned long action, void *arg)
-{
-	struct memory_notify *mnb = arg;
-	int nid = mnb->status_change_nid;
-
-	switch (action) {
-	case MEM_ONLINE:
-	case MEM_OFFLINE:
-		/*
-		 * offload per node hstate [un]registration to a work thread
-		 * when transitioning to/from memoryless state.
-		 */
-		if (nid != NUMA_NO_NODE)
-			schedule_work(&node_devices[nid]->node_work);
-		break;
+			memory_block_add_nid_early(mem, nid);
+			do_register_memory_block_under_node(nid, mem);
+			put_device(&mem->dev);
+		}
 
-	case MEM_GOING_ONLINE:
-	case MEM_GOING_OFFLINE:
-	case MEM_CANCEL_ONLINE:
-	case MEM_CANCEL_OFFLINE:
-	default:
-		break;
 	}
-
-	return NOTIFY_OK;
 }
-#endif	/* CONFIG_HUGETLBFS */
-#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
-#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
-    !defined(CONFIG_HUGETLBFS)
-static inline int node_memory_callback(struct notifier_block *self,
-				unsigned long action, void *arg)
+void register_memory_blocks_under_node_hotplug(int nid, unsigned long start_pfn,
+					       unsigned long end_pfn)
 {
-	return NOTIFY_OK;
+	walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
+			   (void *)&nid, register_mem_block_under_node_hotplug);
+	return;
 }
+#endif /* CONFIG_MEMORY_HOTPLUG */
 
-static void init_node_hugetlb_work(int nid) { }
-
-#endif
-
-int __register_one_node(int nid)
+/**
+ * register_node - Initialize and register the node device.
+ * @nid: Node number to use when creating the device.
+ *
+ * Return: 0 on success, -errno otherwise
+ */
+int register_node(int nid)
 {
 	int error;
 	int cpu;
+	struct node *node;
 
-	node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
-	if (!node_devices[nid])
+	node = kzalloc(sizeof(struct node), GFP_KERNEL);
+	if (!node)
 		return -ENOMEM;
 
-	error = register_node(node_devices[nid], nid);
+	INIT_LIST_HEAD(&node->access_list);
+
+	node->dev.id = nid;
+	node->dev.bus = &node_subsys;
+	node->dev.release = node_device_release;
+	node->dev.groups = node_dev_groups;
+
+	error = device_register(&node->dev);
+	if (error) {
+		put_device(&node->dev);
+		return error;
+	}
+
+	node_devices[nid] = node;
+	hugetlb_register_node(node);
+	compaction_register_node(node);
+	reclaim_register_node(node);
 
 	/* link cpu under this node */
 	for_each_present_cpu(cpu) {
@@ -974,20 +903,30 @@ int __register_one_node(int nid)
 			register_cpu_under_node(cpu, nid);
 	}
 
-	INIT_LIST_HEAD(&node_devices[nid]->access_list);
-	/* initialize work queue for memory hot plug */
-	init_node_hugetlb_work(nid);
 	node_init_caches(nid);
 
 	return error;
 }
-
-void unregister_one_node(int nid)
+/**
+ * unregister_node - unregister a node device
+ * @nid: nid of the node going away
+ *
+ * Unregisters the node device at node id @nid. All the devices on the
+ * node must be unregistered before calling this function.
+ */
+void unregister_node(int nid)
 {
-	if (!node_devices[nid])
+	struct node *node = node_devices[nid];
+
+	if (!node)
 		return;
 
-	unregister_node(node_devices[nid]);
+	hugetlb_unregister_node(node);
+	compaction_unregister_node(node);
+	reclaim_unregister_node(node);
+	node_remove_accesses(node);
+	node_remove_caches(node);
+	device_unregister(&node->dev);
 	node_devices[nid] = NULL;
 }
 
@@ -1047,27 +986,26 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
 	NULL,
 };
 
-#define NODE_CALLBACK_PRI	2	/* lower than SLAB */
-static int __init register_node_type(void)
+void __init node_dev_init(void)
 {
-	int ret;
+	int ret, i;
 
  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
 
 	ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
-	if (!ret) {
-		static struct notifier_block node_memory_callback_nb = {
-			.notifier_call = node_memory_callback,
-			.priority = NODE_CALLBACK_PRI,
-		};
-		register_hotmemory_notifier(&node_memory_callback_nb);
-	}
+	if (ret)
+		panic("%s() failed to register subsystem: %d\n", __func__, ret);
 
 	/*
-	 * Note:  we're not going to unregister the node class if we fail
-	 * to register the node state class attribute files.
+	 * Create all node devices, which will properly link the node
+	 * to already created cpu devices.
 	 */
-	return ret;
+	for_each_online_node(i) {
+		ret =  register_node(i);
+		if (ret)
+			panic("%s() failed to add node: %d\n", __func__, ret);
+	}
+
+	register_memory_blocks_under_nodes();
 }
-postcore_initcall(register_node_type);
diff --git a/drivers/base/physical_location.c b/drivers/base/physical_location.c
new file mode 100644
index 000000000000..a5539e294d4d
--- /dev/null
+++ b/drivers/base/physical_location.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device physical location support
+ *
+ * Author: Won Chung <wonchung@google.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/sysfs.h>
+#include <linux/string_choices.h>
+
+#include "physical_location.h"
+
+bool dev_add_physical_location(struct device *dev)
+{
+	struct acpi_pld_info *pld;
+
+	if (!has_acpi_companion(dev))
+		return false;
+
+	if (!acpi_get_physical_device_location(ACPI_HANDLE(dev), &pld))
+		return false;
+
+	dev->physical_location =
+		kzalloc(sizeof(*dev->physical_location), GFP_KERNEL);
+	if (!dev->physical_location) {
+		ACPI_FREE(pld);
+		return false;
+	}
+
+	dev->physical_location->panel = pld->panel;
+	dev->physical_location->vertical_position = pld->vertical_position;
+	dev->physical_location->horizontal_position = pld->horizontal_position;
+	dev->physical_location->dock = pld->dock;
+	dev->physical_location->lid = pld->lid;
+
+	ACPI_FREE(pld);
+	return true;
+}
+
+static ssize_t panel_show(struct device *dev, struct device_attribute *attr,
+	char *buf)
+{
+	const char *panel;
+
+	switch (dev->physical_location->panel) {
+	case DEVICE_PANEL_TOP:
+		panel = "top";
+		break;
+	case DEVICE_PANEL_BOTTOM:
+		panel = "bottom";
+		break;
+	case DEVICE_PANEL_LEFT:
+		panel = "left";
+		break;
+	case DEVICE_PANEL_RIGHT:
+		panel = "right";
+		break;
+	case DEVICE_PANEL_FRONT:
+		panel = "front";
+		break;
+	case DEVICE_PANEL_BACK:
+		panel = "back";
+		break;
+	default:
+		panel = "unknown";
+	}
+	return sysfs_emit(buf, "%s\n", panel);
+}
+static DEVICE_ATTR_RO(panel);
+
+static ssize_t vertical_position_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	const char *vertical_position;
+
+	switch (dev->physical_location->vertical_position) {
+	case DEVICE_VERT_POS_UPPER:
+		vertical_position = "upper";
+		break;
+	case DEVICE_VERT_POS_CENTER:
+		vertical_position = "center";
+		break;
+	case DEVICE_VERT_POS_LOWER:
+		vertical_position = "lower";
+		break;
+	default:
+		vertical_position = "unknown";
+	}
+	return sysfs_emit(buf, "%s\n", vertical_position);
+}
+static DEVICE_ATTR_RO(vertical_position);
+
+static ssize_t horizontal_position_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	const char *horizontal_position;
+
+	switch (dev->physical_location->horizontal_position) {
+	case DEVICE_HORI_POS_LEFT:
+		horizontal_position = "left";
+		break;
+	case DEVICE_HORI_POS_CENTER:
+		horizontal_position = "center";
+		break;
+	case DEVICE_HORI_POS_RIGHT:
+		horizontal_position = "right";
+		break;
+	default:
+		horizontal_position = "unknown";
+	}
+	return sysfs_emit(buf, "%s\n", horizontal_position);
+}
+static DEVICE_ATTR_RO(horizontal_position);
+
+static ssize_t dock_show(struct device *dev, struct device_attribute *attr,
+	char *buf)
+{
+	return sysfs_emit(buf, "%s\n",
+		str_yes_no(dev->physical_location->dock));
+}
+static DEVICE_ATTR_RO(dock);
+
+static ssize_t lid_show(struct device *dev, struct device_attribute *attr,
+	char *buf)
+{
+	return sysfs_emit(buf, "%s\n",
+		str_yes_no(dev->physical_location->lid));
+}
+static DEVICE_ATTR_RO(lid);
+
+static struct attribute *dev_attr_physical_location[] = {
+	&dev_attr_panel.attr,
+	&dev_attr_vertical_position.attr,
+	&dev_attr_horizontal_position.attr,
+	&dev_attr_dock.attr,
+	&dev_attr_lid.attr,
+	NULL,
+};
+
+const struct attribute_group dev_attr_physical_location_group = {
+	.name = "physical_location",
+	.attrs = dev_attr_physical_location,
+};
+
diff --git a/drivers/base/physical_location.h b/drivers/base/physical_location.h
new file mode 100644
index 000000000000..3f3f61307998
--- /dev/null
+++ b/drivers/base/physical_location.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Device physical location support
+ *
+ * Author: Won Chung <wonchung@google.com>
+ */
+
+#include <linux/device.h>
+
+#ifdef CONFIG_ACPI
+bool dev_add_physical_location(struct device *dev);
+extern const struct attribute_group dev_attr_physical_location_group;
+#else
+static inline bool dev_add_physical_location(struct device *dev) { return false; };
+static const struct attribute_group dev_attr_physical_location_group = {};
+#endif
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 0b72b134a304..70db08f3ac6f 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -4,417 +4,97 @@
  *
  * Copyright (C) 2015 ARM Limited, All Rights Reserved.
  * Author: Marc Zyngier <marc.zyngier@arm.com>
+ * Copyright (C) 2022 Linutronix GmbH
  */
 
 #include <linux/device.h>
-#include <linux/idr.h>
-#include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/msi.h>
-#include <linux/slab.h>
-
-#define DEV_ID_SHIFT	21
-#define MAX_DEV_MSIS	(1 << (32 - DEV_ID_SHIFT))
 
 /*
- * Internal data structure containing a (made up, but unique) devid
- * and the callback to write the MSI message.
+ * This indirection can go when platform_device_msi_init_and_alloc_irqs()
+ * is switched to a proper irq_chip::irq_write_msi_msg() callback. Keep it
+ * simple for now.
  */
-struct platform_msi_priv_data {
-	struct device		*dev;
-	void 			*host_data;
-	msi_alloc_info_t	arg;
-	irq_write_msi_msg_t	write_msg;
-	int			devid;
-};
-
-/* The devid allocator */
-static DEFINE_IDA(platform_msi_devid_ida);
-
-#ifdef GENERIC_MSI_DOMAIN_OPS
-/*
- * Convert an msi_desc to a globaly unique identifier (per-device
- * devid + msi_desc position in the msi_list).
- */
-static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc)
+static void platform_msi_write_msi_msg(struct irq_data *d, struct msi_msg *msg)
 {
-	u32 devid;
-
-	devid = desc->platform.msi_priv_data->devid;
+	irq_write_msi_msg_t cb = d->chip_data;
 
-	return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index;
+	cb(irq_data_get_msi_desc(d), msg);
 }
 
 static void platform_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
 {
 	arg->desc = desc;
-	arg->hwirq = platform_msi_calc_hwirq(desc);
-}
-
-static int platform_msi_init(struct irq_domain *domain,
-			     struct msi_domain_info *info,
-			     unsigned int virq, irq_hw_number_t hwirq,
-			     msi_alloc_info_t *arg)
-{
-	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-					     info->chip, info->chip_data);
-}
-
-static void platform_msi_set_proxy_dev(msi_alloc_info_t *arg)
-{
-	arg->flags |= MSI_ALLOC_FLAGS_PROXY_DEVICE;
-}
-#else
-#define platform_msi_set_desc		NULL
-#define platform_msi_init		NULL
-#define platform_msi_set_proxy_dev(x)	do {} while(0)
-#endif
-
-static void platform_msi_update_dom_ops(struct msi_domain_info *info)
-{
-	struct msi_domain_ops *ops = info->ops;
-
-	BUG_ON(!ops);
-
-	if (ops->msi_init == NULL)
-		ops->msi_init = platform_msi_init;
-	if (ops->set_desc == NULL)
-		ops->set_desc = platform_msi_set_desc;
-}
-
-static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
-{
-	struct msi_desc *desc = irq_data_get_msi_desc(data);
-	struct platform_msi_priv_data *priv_data;
-
-	priv_data = desc->platform.msi_priv_data;
-
-	priv_data->write_msg(desc, msg);
-}
-
-static void platform_msi_update_chip_ops(struct msi_domain_info *info)
-{
-	struct irq_chip *chip = info->chip;
-
-	BUG_ON(!chip);
-	if (!chip->irq_mask)
-		chip->irq_mask = irq_chip_mask_parent;
-	if (!chip->irq_unmask)
-		chip->irq_unmask = irq_chip_unmask_parent;
-	if (!chip->irq_eoi)
-		chip->irq_eoi = irq_chip_eoi_parent;
-	if (!chip->irq_set_affinity)
-		chip->irq_set_affinity = msi_domain_set_affinity;
-	if (!chip->irq_write_msi_msg)
-		chip->irq_write_msi_msg = platform_msi_write_msg;
-	if (WARN_ON((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
-		    !(chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)))
-		info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
-}
-
-static void platform_msi_free_descs(struct device *dev, int base, int nvec)
-{
-	struct msi_desc *desc, *tmp;
-
-	list_for_each_entry_safe(desc, tmp, dev_to_msi_list(dev), list) {
-		if (desc->platform.msi_index >= base &&
-		    desc->platform.msi_index < (base + nvec)) {
-			list_del(&desc->list);
-			free_msi_entry(desc);
-		}
-	}
-}
-
-static int platform_msi_alloc_descs_with_irq(struct device *dev, int virq,
-					     int nvec,
-					     struct platform_msi_priv_data *data)
-
-{
-	struct msi_desc *desc;
-	int i, base = 0;
-
-	if (!list_empty(dev_to_msi_list(dev))) {
-		desc = list_last_entry(dev_to_msi_list(dev),
-				       struct msi_desc, list);
-		base = desc->platform.msi_index + 1;
-	}
-
-	for (i = 0; i < nvec; i++) {
-		desc = alloc_msi_entry(dev, 1, NULL);
-		if (!desc)
-			break;
-
-		desc->platform.msi_priv_data = data;
-		desc->platform.msi_index = base + i;
-		desc->irq = virq ? virq + i : 0;
-
-		list_add_tail(&desc->list, dev_to_msi_list(dev));
-	}
-
-	if (i != nvec) {
-		/* Clean up the mess */
-		platform_msi_free_descs(dev, base, nvec);
-
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int platform_msi_alloc_descs(struct device *dev, int nvec,
-				    struct platform_msi_priv_data *data)
-
-{
-	return platform_msi_alloc_descs_with_irq(dev, 0, nvec, data);
-}
-
-/**
- * platform_msi_create_irq_domain - Create a platform MSI interrupt domain
- * @fwnode:		Optional fwnode of the interrupt controller
- * @info:	MSI domain info
- * @parent:	Parent irq domain
- *
- * Updates the domain and chip ops and creates a platform MSI
- * interrupt domain.
- *
- * Returns:
- * A domain pointer or NULL in case of failure.
- */
-struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
-						  struct msi_domain_info *info,
-						  struct irq_domain *parent)
-{
-	struct irq_domain *domain;
-
-	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
-		platform_msi_update_dom_ops(info);
-	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
-		platform_msi_update_chip_ops(info);
-
-	domain = msi_create_irq_domain(fwnode, info, parent);
-	if (domain)
-		irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
-
-	return domain;
+	arg->hwirq = desc->msi_index;
 }
 
-static struct platform_msi_priv_data *
-platform_msi_alloc_priv_data(struct device *dev, unsigned int nvec,
-			     irq_write_msi_msg_t write_msi_msg)
-{
-	struct platform_msi_priv_data *datap;
-	/*
-	 * Limit the number of interrupts to 2048 per device. Should we
-	 * need to bump this up, DEV_ID_SHIFT should be adjusted
-	 * accordingly (which would impact the max number of MSI
-	 * capable devices).
-	 */
-	if (!dev->msi_domain || !write_msi_msg || !nvec || nvec > MAX_DEV_MSIS)
-		return ERR_PTR(-EINVAL);
-
-	if (dev->msi_domain->bus_token != DOMAIN_BUS_PLATFORM_MSI) {
-		dev_err(dev, "Incompatible msi_domain, giving up\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* Already had a helping of MSI? Greed... */
-	if (!list_empty(dev_to_msi_list(dev)))
-		return ERR_PTR(-EBUSY);
-
-	datap = kzalloc(sizeof(*datap), GFP_KERNEL);
-	if (!datap)
-		return ERR_PTR(-ENOMEM);
+static const struct msi_domain_template platform_msi_template = {
+	.chip = {
+		.name			= "pMSI",
+		.irq_mask		= irq_chip_mask_parent,
+		.irq_unmask		= irq_chip_unmask_parent,
+		.irq_write_msi_msg	= platform_msi_write_msi_msg,
+		/* The rest is filled in by the platform MSI parent */
+	},
 
-	datap->devid = ida_simple_get(&platform_msi_devid_ida,
-				      0, 1 << DEV_ID_SHIFT, GFP_KERNEL);
-	if (datap->devid < 0) {
-		int err = datap->devid;
-		kfree(datap);
-		return ERR_PTR(err);
-	}
+	.ops = {
+		.set_desc		= platform_msi_set_desc,
+	},
 
-	datap->write_msg = write_msi_msg;
-	datap->dev = dev;
-
-	return datap;
-}
-
-static void platform_msi_free_priv_data(struct platform_msi_priv_data *data)
-{
-	ida_simple_remove(&platform_msi_devid_ida, data->devid);
-	kfree(data);
-}
+	.info = {
+		.bus_token		= DOMAIN_BUS_DEVICE_MSI,
+	},
+};
 
 /**
- * platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev
+ * platform_device_msi_init_and_alloc_irqs - Initialize platform device MSI
+ *					     and allocate interrupts for @dev
  * @dev:		The device for which to allocate interrupts
  * @nvec:		The number of interrupts to allocate
  * @write_msi_msg:	Callback to write an interrupt message for @dev
  *
  * Returns:
  * Zero for success, or an error code in case of failure
- */
-int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
-				   irq_write_msi_msg_t write_msi_msg)
-{
-	struct platform_msi_priv_data *priv_data;
-	int err;
-
-	priv_data = platform_msi_alloc_priv_data(dev, nvec, write_msi_msg);
-	if (IS_ERR(priv_data))
-		return PTR_ERR(priv_data);
-
-	err = platform_msi_alloc_descs(dev, nvec, priv_data);
-	if (err)
-		goto out_free_priv_data;
-
-	err = msi_domain_alloc_irqs(dev->msi_domain, dev, nvec);
-	if (err)
-		goto out_free_desc;
-
-	return 0;
-
-out_free_desc:
-	platform_msi_free_descs(dev, 0, nvec);
-out_free_priv_data:
-	platform_msi_free_priv_data(priv_data);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
-
-/**
- * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
- * @dev:	The device for which to free interrupts
- */
-void platform_msi_domain_free_irqs(struct device *dev)
-{
-	if (!list_empty(dev_to_msi_list(dev))) {
-		struct msi_desc *desc;
-
-		desc = first_msi_entry(dev);
-		platform_msi_free_priv_data(desc->platform.msi_priv_data);
-	}
-
-	msi_domain_free_irqs(dev->msi_domain, dev);
-	platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
-}
-EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
-
-/**
- * platform_msi_get_host_data - Query the private data associated with
- *                              a platform-msi domain
- * @domain:	The platform-msi domain
- *
- * Returns the private data provided when calling
- * platform_msi_create_device_domain.
- */
-void *platform_msi_get_host_data(struct irq_domain *domain)
-{
-	struct platform_msi_priv_data *data = domain->host_data;
-	return data->host_data;
-}
-
-/**
- * __platform_msi_create_device_domain - Create a platform-msi domain
  *
- * @dev:		The device generating the MSIs
- * @nvec:		The number of MSIs that need to be allocated
- * @is_tree:		flag to indicate tree hierarchy
- * @write_msi_msg:	Callback to write an interrupt message for @dev
- * @ops:		The hierarchy domain operations to use
- * @host_data:		Private data associated to this domain
+ * This creates a MSI domain on @dev which has @dev->msi.domain as
+ * parent. The parent domain sets up the new domain. The domain has
+ * a fixed size of @nvec. The domain is managed by devres and will
+ * be removed when the device is removed.
  *
- * Returns an irqdomain for @nvec interrupts
+ * Note: For migration purposes this falls back to the original platform_msi code
+ *	 up to the point where all platforms have been converted to the MSI
+ *	 parent model.
  */
-struct irq_domain *
-__platform_msi_create_device_domain(struct device *dev,
-				    unsigned int nvec,
-				    bool is_tree,
-				    irq_write_msi_msg_t write_msi_msg,
-				    const struct irq_domain_ops *ops,
-				    void *host_data)
+int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
+					    irq_write_msi_msg_t write_msi_msg)
 {
-	struct platform_msi_priv_data *data;
-	struct irq_domain *domain;
-	int err;
-
-	data = platform_msi_alloc_priv_data(dev, nvec, write_msi_msg);
-	if (IS_ERR(data))
-		return NULL;
-
-	data->host_data = host_data;
-	domain = irq_domain_create_hierarchy(dev->msi_domain, 0,
-					     is_tree ? 0 : nvec,
-					     dev->fwnode, ops, data);
-	if (!domain)
-		goto free_priv;
-
-	platform_msi_set_proxy_dev(&data->arg);
-	err = msi_domain_prepare_irqs(domain->parent, dev, nvec, &data->arg);
-	if (err)
-		goto free_domain;
+	struct irq_domain *domain = dev->msi.domain;
 
-	return domain;
+	if (!domain || !write_msi_msg)
+		return -EINVAL;
 
-free_domain:
-	irq_domain_remove(domain);
-free_priv:
-	platform_msi_free_priv_data(data);
-	return NULL;
-}
-
-/**
- * platform_msi_domain_free - Free interrupts associated with a platform-msi
- *                            domain
- *
- * @domain:	The platform-msi domain
- * @virq:	The base irq from which to perform the free operation
- * @nvec:	How many interrupts to free from @virq
- */
-void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nvec)
-{
-	struct platform_msi_priv_data *data = domain->host_data;
-	struct msi_desc *desc, *tmp;
-	for_each_msi_entry_safe(desc, tmp, data->dev) {
-		if (WARN_ON(!desc->irq || desc->nvec_used != 1))
-			return;
-		if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
-			continue;
+	/*
+	 * @write_msi_msg is stored in the resulting msi_domain_info::data.
+	 * The underlying domain creation mechanism will assign that
+	 * callback to the resulting irq chip.
+	 */
+	if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN,
+					  &platform_msi_template,
+					  nvec, NULL, write_msi_msg))
+		return -ENODEV;
 
-		irq_domain_free_irqs_common(domain, desc->irq, 1);
-		list_del(&desc->list);
-		free_msi_entry(desc);
-	}
+	return msi_domain_alloc_irqs_range(dev, MSI_DEFAULT_DOMAIN, 0, nvec - 1);
 }
+EXPORT_SYMBOL_GPL(platform_device_msi_init_and_alloc_irqs);
 
 /**
- * platform_msi_domain_alloc - Allocate interrupts associated with
- *			       a platform-msi domain
- *
- * @domain:	The platform-msi domain
- * @virq:	The base irq from which to perform the allocate operation
- * @nr_irqs:	How many interrupts to free from @virq
- *
- * Return 0 on success, or an error code on failure. Must be called
- * with irq_domain_mutex held (which can only be done as part of a
- * top-level interrupt allocation).
+ * platform_device_msi_free_irqs_all - Free all interrupts for @dev
+ * @dev:	The device for which to free interrupts
  */
-int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs)
+void platform_device_msi_free_irqs_all(struct device *dev)
 {
-	struct platform_msi_priv_data *data = domain->host_data;
-	int err;
-
-	err = platform_msi_alloc_descs_with_irq(data->dev, virq, nr_irqs, data);
-	if (err)
-		return err;
-
-	err = msi_domain_populate_irqs(domain->parent, data->dev,
-				       virq, nr_irqs, &data->arg);
-	if (err)
-		platform_msi_domain_free(domain, virq, nr_irqs);
-
-	return err;
+	msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
+	msi_remove_device_irq_domain(dev, MSI_DEFAULT_DOMAIN);
 }
+EXPORT_SYMBOL_GPL(platform_device_msi_free_irqs_all);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8640578f45e9..b45d41b018ca 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -30,6 +30,8 @@
 #include <linux/property.h>
 #include <linux/kmemleak.h>
 #include <linux/types.h>
+#include <linux/iommu.h>
+#include <linux/dma-map-ops.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -148,25 +150,37 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev,
 EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname);
 #endif /* CONFIG_HAS_IOMEM */
 
+static const struct cpumask *get_irq_affinity(struct platform_device *dev,
+					      unsigned int num)
+{
+	const struct cpumask *mask = NULL;
+#ifndef CONFIG_SPARC
+	struct fwnode_handle *fwnode = dev_fwnode(&dev->dev);
+
+	if (is_of_node(fwnode))
+		mask = of_irq_get_affinity(to_of_node(fwnode), num);
+	else if (is_acpi_device_node(fwnode))
+		mask = acpi_irq_get_affinity(ACPI_HANDLE_FWNODE(fwnode), num);
+#endif
+
+	return mask ?: cpu_possible_mask;
+}
+
 /**
- * platform_get_irq_optional - get an optional IRQ for a device
- * @dev: platform device
- * @num: IRQ number index
+ * platform_get_irq_affinity - get an optional IRQ and its affinity for a device
+ * @dev:	platform device
+ * @num:	interrupt number index
+ * @affinity:	optional cpumask pointer to get the affinity of a per-cpu interrupt
  *
- * Gets an IRQ for a platform device. Device drivers should check the return
- * value for errors so as to not pass a negative integer value to the
- * request_irq() APIs. This is the same as platform_get_irq(), except that it
- * does not print an error message if an IRQ can not be obtained.
- *
- * For example::
+ * Gets an interupt for a platform device. Device drivers should check the
+ * return value for errors so as to not pass a negative integer value to
+ * the request_irq() APIs. Optional affinity information is provided in the
+ * affinity pointer if available, and NULL otherwise.
  *
- *		int irq = platform_get_irq_optional(pdev, 0);
- *		if (irq < 0)
- *			return irq;
- *
- * Return: non-zero IRQ number on success, negative error number on failure.
+ * Return: non-zero interrupt number on success, negative error number on failure.
  */
-int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
+int platform_get_irq_affinity(struct platform_device *dev, unsigned int num,
+			      const struct cpumask **affinity)
 {
 	int ret;
 #ifdef CONFIG_SPARC
@@ -176,18 +190,19 @@ int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
 	ret = dev->archdata.irqs[num];
 	goto out;
 #else
+	struct fwnode_handle *fwnode = dev_fwnode(&dev->dev);
 	struct resource *r;
 
-	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
-		ret = of_irq_get(dev->dev.of_node, num);
+	if (is_of_node(fwnode)) {
+		ret = of_irq_get(to_of_node(fwnode), num);
 		if (ret > 0 || ret == -EPROBE_DEFER)
 			goto out;
 	}
 
 	r = platform_get_resource(dev, IORESOURCE_IRQ, num);
-	if (has_acpi_companion(&dev->dev)) {
+	if (is_acpi_device_node(fwnode)) {
 		if (r && r->flags & IORESOURCE_DISABLED) {
-			ret = acpi_irq_get(ACPI_HANDLE(&dev->dev), num, r);
+			ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), num, r);
 			if (ret)
 				goto out;
 		}
@@ -220,8 +235,8 @@ int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
 	 * the device will only expose one IRQ, and this fallback
 	 * allows a common code path across either kind of resource.
 	 */
-	if (num == 0 && has_acpi_companion(&dev->dev)) {
-		ret = acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num);
+	if (num == 0 && is_acpi_device_node(fwnode)) {
+		ret = acpi_dev_gpio_irq_get(to_acpi_device_node(fwnode), num);
 		/* Our callers expect -ENXIO for missing IRQs. */
 		if (ret >= 0 || ret == -EPROBE_DEFER)
 			goto out;
@@ -231,9 +246,39 @@ int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
 out_not_found:
 	ret = -ENXIO;
 out:
-	WARN(ret == 0, "0 is an invalid IRQ number\n");
+	if (WARN(!ret, "0 is an invalid IRQ number\n"))
+		return -EINVAL;
+
+	if (ret > 0 && affinity)
+		*affinity = get_irq_affinity(dev, num);
+
 	return ret;
 }
+EXPORT_SYMBOL_GPL(platform_get_irq_affinity);
+
+/**
+ * platform_get_irq_optional - get an optional interrupt for a device
+ * @dev:	platform device
+ * @num:	interrupt number index
+ *
+ * Gets an interrupt for a platform device. Device drivers should check the
+ * return value for errors so as to not pass a negative integer value to
+ * the request_irq() APIs. This is the same as platform_get_irq(), except
+ * that it does not print an error message if an interrupt can not be
+ * obtained.
+ *
+ * For example::
+ *
+ *		int irq = platform_get_irq_optional(pdev, 0);
+ *		if (irq < 0)
+ *			return irq;
+ *
+ * Return: non-zero interrupt number on success, negative error number on failure.
+ */
+int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
+{
+	return platform_get_irq_affinity(dev, num, NULL);
+}
 EXPORT_SYMBOL_GPL(platform_get_irq_optional);
 
 /**
@@ -258,8 +303,9 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
 	int ret;
 
 	ret = platform_get_irq_optional(dev, num);
-	if (ret < 0 && ret != -EPROBE_DEFER)
-		dev_err(&dev->dev, "IRQ index %u not found\n", num);
+	if (ret < 0)
+		return dev_err_probe(&dev->dev, ret,
+				     "IRQ index %u not found\n", num);
 
 	return ret;
 }
@@ -287,7 +333,7 @@ EXPORT_SYMBOL_GPL(platform_irq_count);
 
 struct irq_affinity_devres {
 	unsigned int count;
-	unsigned int irq[];
+	unsigned int irq[] __counted_by(count);
 };
 
 static void platform_disable_acpi_irq(struct platform_device *pdev, int index)
@@ -308,7 +354,7 @@ static void devm_platform_get_irqs_affinity_release(struct device *dev,
 	for (i = 0; i < ptr->count; i++) {
 		irq_dispose_mapping(ptr->irq[i]);
 
-		if (has_acpi_companion(dev))
+		if (is_acpi_device_node(dev_fwnode(dev)))
 			platform_disable_acpi_irq(to_platform_device(dev), i);
 	}
 }
@@ -437,15 +483,14 @@ static int __platform_get_irq_byname(struct platform_device *dev,
 	struct resource *r;
 	int ret;
 
-	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
-		ret = of_irq_get_byname(dev->dev.of_node, name);
-		if (ret > 0 || ret == -EPROBE_DEFER)
-			return ret;
-	}
+	ret = fwnode_irq_get_byname(dev_fwnode(&dev->dev), name);
+	if (ret > 0 || ret == -EPROBE_DEFER)
+		return ret;
 
 	r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name);
 	if (r) {
-		WARN(r->start == 0, "0 is an invalid IRQ number\n");
+		if (WARN(!r->start, "0 is an invalid IRQ number\n"))
+			return -EINVAL;
 		return r->start;
 	}
 
@@ -466,9 +511,9 @@ int platform_get_irq_byname(struct platform_device *dev, const char *name)
 	int ret;
 
 	ret = __platform_get_irq_byname(dev, name);
-	if (ret < 0 && ret != -EPROBE_DEFER)
-		dev_err(&dev->dev, "IRQ %s not found\n", name);
-
+	if (ret < 0)
+		return dev_err_probe(&dev->dev, ret, "IRQ %s not found\n",
+				     name);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(platform_get_irq_byname);
@@ -494,6 +539,8 @@ EXPORT_SYMBOL_GPL(platform_get_irq_byname_optional);
  * platform_add_devices - add a numbers of platform devices
  * @devs: array of platform devices to add
  * @num: number of platform devices in array
+ *
+ * Return: 0 on success, negative error number on failure.
  */
 int platform_add_devices(struct platform_device **devs, int num)
 {
@@ -602,7 +649,7 @@ int platform_device_add_resources(struct platform_device *pdev,
 	struct resource *r = NULL;
 
 	if (res) {
-		r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL);
+		r = kmemdup_array(res, num, sizeof(*r), GFP_KERNEL);
 		if (!r)
 			return -ENOMEM;
 	}
@@ -642,22 +689,6 @@ int platform_device_add_data(struct platform_device *pdev, const void *data,
 EXPORT_SYMBOL_GPL(platform_device_add_data);
 
 /**
- * platform_device_add_properties - add built-in properties to a platform device
- * @pdev: platform device to add properties to
- * @properties: null terminated array of properties to add
- *
- * The function will take deep copy of @properties and attach the copy to the
- * platform device. The memory associated with properties will be freed when the
- * platform device is released.
- */
-int platform_device_add_properties(struct platform_device *pdev,
-				   const struct property_entry *properties)
-{
-	return device_add_properties(&pdev->dev, properties);
-}
-EXPORT_SYMBOL_GPL(platform_device_add_properties);
-
-/**
  * platform_device_add - add a platform device to device hierarchy
  * @pdev: platform device we're adding
  *
@@ -666,23 +697,21 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties);
  */
 int platform_device_add(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
 	u32 i;
 	int ret;
 
-	if (!pdev)
-		return -EINVAL;
+	if (!dev->parent)
+		dev->parent = &platform_bus;
 
-	if (!pdev->dev.parent)
-		pdev->dev.parent = &platform_bus;
-
-	pdev->dev.bus = &platform_bus_type;
+	dev->bus = &platform_bus_type;
 
 	switch (pdev->id) {
 	default:
-		dev_set_name(&pdev->dev, "%s.%d", pdev->name,  pdev->id);
+		dev_set_name(dev, "%s.%d", pdev->name,  pdev->id);
 		break;
 	case PLATFORM_DEVID_NONE:
-		dev_set_name(&pdev->dev, "%s", pdev->name);
+		dev_set_name(dev, "%s", pdev->name);
 		break;
 	case PLATFORM_DEVID_AUTO:
 		/*
@@ -692,10 +721,10 @@ int platform_device_add(struct platform_device *pdev)
 		 */
 		ret = ida_alloc(&platform_devid_ida, GFP_KERNEL);
 		if (ret < 0)
-			goto err_out;
+			return ret;
 		pdev->id = ret;
 		pdev->id_auto = true;
-		dev_set_name(&pdev->dev, "%s.%d.auto", pdev->name, pdev->id);
+		dev_set_name(dev, "%s.%d.auto", pdev->name, pdev->id);
 		break;
 	}
 
@@ -703,7 +732,7 @@ int platform_device_add(struct platform_device *pdev)
 		struct resource *p, *r = &pdev->resource[i];
 
 		if (r->name == NULL)
-			r->name = dev_name(&pdev->dev);
+			r->name = dev_name(dev);
 
 		p = r->parent;
 		if (!p) {
@@ -716,18 +745,20 @@ int platform_device_add(struct platform_device *pdev)
 		if (p) {
 			ret = insert_resource(p, r);
 			if (ret) {
-				dev_err(&pdev->dev, "failed to claim resource %d: %pR\n", i, r);
+				dev_err(dev, "failed to claim resource %d: %pR\n", i, r);
 				goto failed;
 			}
 		}
 	}
 
-	pr_debug("Registering platform device '%s'. Parent at %s\n",
-		 dev_name(&pdev->dev), dev_name(pdev->dev.parent));
+	pr_debug("Registering platform device '%s'. Parent at %s\n", dev_name(dev),
+		 dev_name(dev->parent));
 
-	ret = device_add(&pdev->dev);
-	if (ret == 0)
-		return ret;
+	ret = device_add(dev);
+	if (ret)
+		goto failed;
+
+	return 0;
 
  failed:
 	if (pdev->id_auto) {
@@ -741,7 +772,6 @@ int platform_device_add(struct platform_device *pdev)
 			release_resource(r);
 	}
 
- err_out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(platform_device_add);
@@ -778,6 +808,10 @@ EXPORT_SYMBOL_GPL(platform_device_del);
 /**
  * platform_device_register - add a platform-level device
  * @pdev: platform device we're adding
+ *
+ * NOTE: _Never_ directly free @pdev after calling this function, even if it
+ * returned an error! Always use platform_device_put() to give up the
+ * reference initialised in this function instead.
  */
 int platform_device_register(struct platform_device *pdev)
 {
@@ -842,8 +876,8 @@ struct platform_device *platform_device_register_full(
 		goto err;
 
 	if (pdevinfo->properties) {
-		ret = platform_device_add_properties(pdev,
-						     pdevinfo->properties);
+		ret = device_create_managed_software_node(&pdev->dev,
+							  pdevinfo->properties, NULL);
 		if (ret)
 			goto err;
 	}
@@ -890,6 +924,13 @@ static int platform_probe_fail(struct platform_device *pdev)
 	return -ENXIO;
 }
 
+static int is_bound_to_driver(struct device *dev, void *driver)
+{
+	if (dev->driver == driver)
+		return 1;
+	return 0;
+}
+
 /**
  * __platform_driver_probe - register driver for non-hotpluggable device
  * @drv: platform driver structure
@@ -913,7 +954,7 @@ static int platform_probe_fail(struct platform_device *pdev)
 int __init_or_module __platform_driver_probe(struct platform_driver *drv,
 		int (*probe)(struct platform_device *), struct module *module)
 {
-	int retval, code;
+	int retval;
 
 	if (drv->driver.probe_type == PROBE_PREFER_ASYNCHRONOUS) {
 		pr_err("%s: drivers registered with %s can not be probed asynchronously\n",
@@ -939,24 +980,21 @@ int __init_or_module __platform_driver_probe(struct platform_driver *drv,
 
 	/* temporary section violation during probe() */
 	drv->probe = probe;
-	retval = code = __platform_driver_register(drv, module);
+	retval = __platform_driver_register(drv, module);
 	if (retval)
 		return retval;
 
-	/*
-	 * Fixup that section violation, being paranoid about code scanning
-	 * the list of drivers in order to probe new devices.  Check to see
-	 * if the probe was successful, and make sure any forced probes of
-	 * new devices fail.
-	 */
-	spin_lock(&drv->driver.bus->p->klist_drivers.k_lock);
+	/* Force all new probes of this driver to fail */
 	drv->probe = platform_probe_fail;
-	if (code == 0 && list_empty(&drv->driver.p->klist_devices.k_list))
-		retval = -ENODEV;
-	spin_unlock(&drv->driver.bus->p->klist_drivers.k_lock);
 
-	if (code != retval)
+	/* Walk all platform devices and see if any actually bound to this driver.
+	 * If not, return an error as the device should have done so by now.
+	 */
+	if (!bus_for_each_dev(&platform_bus_type, NULL, &drv->driver, is_bound_to_driver)) {
+		retval = -ENODEV;
 		platform_driver_unregister(drv);
+	}
+
 	return retval;
 }
 EXPORT_SYMBOL_GPL(__platform_driver_probe);
@@ -985,7 +1023,7 @@ struct platform_device * __init_or_module __platform_create_bundle(
 	struct platform_device *pdev;
 	int error;
 
-	pdev = platform_device_alloc(driver->driver.name, -1);
+	pdev = platform_device_alloc(driver->driver.name, PLATFORM_DEVID_NONE);
 	if (!pdev) {
 		error = -ENOMEM;
 		goto err_out;
@@ -1125,7 +1163,7 @@ static int platform_legacy_resume(struct device *dev)
 
 int platform_pm_suspend(struct device *dev)
 {
-	struct device_driver *drv = dev->driver;
+	const struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (!drv)
@@ -1143,7 +1181,7 @@ int platform_pm_suspend(struct device *dev)
 
 int platform_pm_resume(struct device *dev)
 {
-	struct device_driver *drv = dev->driver;
+	const struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (!drv)
@@ -1165,7 +1203,7 @@ int platform_pm_resume(struct device *dev)
 
 int platform_pm_freeze(struct device *dev)
 {
-	struct device_driver *drv = dev->driver;
+	const struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (!drv)
@@ -1183,7 +1221,7 @@ int platform_pm_freeze(struct device *dev)
 
 int platform_pm_thaw(struct device *dev)
 {
-	struct device_driver *drv = dev->driver;
+	const struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (!drv)
@@ -1201,7 +1239,7 @@ int platform_pm_thaw(struct device *dev)
 
 int platform_pm_poweroff(struct device *dev)
 {
-	struct device_driver *drv = dev->driver;
+	const struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (!drv)
@@ -1219,7 +1257,7 @@ int platform_pm_poweroff(struct device *dev)
 
 int platform_pm_restore(struct device *dev)
 {
-	struct device_driver *drv = dev->driver;
+	const struct device_driver *drv = dev->driver;
 	int ret = 0;
 
 	if (!drv)
@@ -1286,31 +1324,11 @@ static ssize_t driver_override_store(struct device *dev,
 				     const char *buf, size_t count)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	char *driver_override, *old, *cp;
-
-	/* We need to keep extra room for a newline */
-	if (count >= (PAGE_SIZE - 1))
-		return -EINVAL;
-
-	driver_override = kstrndup(buf, count, GFP_KERNEL);
-	if (!driver_override)
-		return -ENOMEM;
-
-	cp = strchr(driver_override, '\n');
-	if (cp)
-		*cp = '\0';
-
-	device_lock(dev);
-	old = pdev->driver_override;
-	if (strlen(driver_override)) {
-		pdev->driver_override = driver_override;
-	} else {
-		kfree(driver_override);
-		pdev->driver_override = NULL;
-	}
-	device_unlock(dev);
+	int ret;
 
-	kfree(old);
+	ret = driver_set_override(dev, &pdev->driver_override, buf, count);
+	if (ret)
+		return ret;
 
 	return count;
 }
@@ -1355,7 +1373,7 @@ __ATTRIBUTE_GROUPS(platform_dev);
  * and compare it against the name of the driver. Return whether they match
  * or not.
  */
-static int platform_match(struct device *dev, struct device_driver *drv)
+static int platform_match(struct device *dev, const struct device_driver *drv)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct platform_driver *pdrv = to_platform_driver(drv);
@@ -1380,9 +1398,9 @@ static int platform_match(struct device *dev, struct device_driver *drv)
 	return (strcmp(pdev->name, drv->name) == 0);
 }
 
-static int platform_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int platform_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
+	const struct platform_device *pdev = to_platform_device(dev);
 	int rc;
 
 	/* Some devices have extra OF data and an OF-style MODALIAS */
@@ -1419,15 +1437,13 @@ static int platform_probe(struct device *_dev)
 	if (ret < 0)
 		return ret;
 
-	ret = dev_pm_domain_attach(_dev, true);
+	ret = dev_pm_domain_attach(_dev, PD_FLAG_ATTACH_POWER_ON |
+					 PD_FLAG_DETACH_POWER_OFF);
 	if (ret)
 		goto out;
 
-	if (drv->probe) {
+	if (drv->probe)
 		ret = drv->probe(dev);
-		if (ret)
-			dev_pm_domain_detach(_dev, true);
-	}
 
 out:
 	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
@@ -1438,20 +1454,13 @@ out:
 	return ret;
 }
 
-static int platform_remove(struct device *_dev)
+static void platform_remove(struct device *_dev)
 {
 	struct platform_driver *drv = to_platform_driver(_dev->driver);
 	struct platform_device *dev = to_platform_device(_dev);
 
-	if (drv->remove) {
-		int ret = drv->remove(dev);
-
-		if (ret)
-			dev_warn(_dev, "remove callback returned a non-zero value. This will be ignored.\n");
-	}
-	dev_pm_domain_detach(_dev, true);
-
-	return 0;
+	if (drv->remove)
+		drv->remove(dev);
 }
 
 static void platform_shutdown(struct device *_dev)
@@ -1467,29 +1476,44 @@ static void platform_shutdown(struct device *_dev)
 		drv->shutdown(dev);
 }
 
-
-int platform_dma_configure(struct device *dev)
+static int platform_dma_configure(struct device *dev)
 {
+	struct device_driver *drv = READ_ONCE(dev->driver);
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
 	enum dev_dma_attr attr;
 	int ret = 0;
 
-	if (dev->of_node) {
-		ret = of_dma_configure(dev, dev->of_node, true);
-	} else if (has_acpi_companion(dev)) {
-		attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode));
+	if (is_of_node(fwnode)) {
+		ret = of_dma_configure(dev, to_of_node(fwnode), true);
+	} else if (is_acpi_device_node(fwnode)) {
+		attr = acpi_get_dma_attr(to_acpi_device_node(fwnode));
 		ret = acpi_dma_configure(dev, attr);
 	}
+	/* @dev->driver may not be valid when we're called from the IOMMU layer */
+	if (ret || !drv || to_platform_driver(drv)->driver_managed_dma)
+		return ret;
+
+	ret = iommu_device_use_default_domain(dev);
+	if (ret)
+		arch_teardown_dma_ops(dev);
 
 	return ret;
 }
 
+static void platform_dma_cleanup(struct device *dev)
+{
+	struct platform_driver *drv = to_platform_driver(dev->driver);
+
+	if (!drv->driver_managed_dma)
+		iommu_device_unuse_default_domain(dev);
+}
+
 static const struct dev_pm_ops platform_dev_pm_ops = {
-	.runtime_suspend = pm_generic_runtime_suspend,
-	.runtime_resume = pm_generic_runtime_resume,
+	SET_RUNTIME_PM_OPS(pm_generic_runtime_suspend, pm_generic_runtime_resume, NULL)
 	USE_PLATFORM_PM_SLEEP_OPS
 };
 
-struct bus_type platform_bus_type = {
+const struct bus_type platform_bus_type = {
 	.name		= "platform",
 	.dev_groups	= platform_dev_groups,
 	.match		= platform_match,
@@ -1498,6 +1522,7 @@ struct bus_type platform_bus_type = {
 	.remove		= platform_remove,
 	.shutdown	= platform_shutdown,
 	.dma_configure	= platform_dma_configure,
+	.dma_cleanup	= platform_dma_cleanup,
 	.pm		= &platform_dev_pm_ops,
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
@@ -1537,6 +1562,6 @@ int __init platform_bus_init(void)
 	error =  bus_register(&platform_bus_type);
 	if (error)
 		device_unregister(&platform_bus);
-	of_platform_register_reconfig_notifier();
+
 	return error;
 }
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 8fdd0073eeeb..2989e42d0161 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -2,8 +2,8 @@
 obj-$(CONFIG_PM)	+= sysfs.o generic_ops.o common.o qos.o runtime.o wakeirq.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o wakeup_stats.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
-obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 obj-$(CONFIG_PM_QOS_KUNIT_TEST) += qos-test.o
+obj-$(CONFIG_PM_RUNTIME_KUNIT_TEST) += runtime-test.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 0251f3e6e61d..b69bcb37c830 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -259,39 +259,6 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(pm_clk_add_clk);
 
-
-/**
- * of_pm_clk_add_clk - Start using a device clock for power management.
- * @dev: Device whose clock is going to be used for power management.
- * @name: Name of clock that is going to be used for power management.
- *
- * Add the clock described in the 'clocks' device-tree node that matches
- * with the 'name' provided, to the list of clocks used for the power
- * management of @dev. On success, returns 0. Returns a negative error
- * code if the clock is not found or cannot be added.
- */
-int of_pm_clk_add_clk(struct device *dev, const char *name)
-{
-	struct clk *clk;
-	int ret;
-
-	if (!dev || !dev->of_node || !name)
-		return -EINVAL;
-
-	clk = of_clk_get_by_name(dev->of_node, name);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	ret = pm_clk_add_clk(dev, clk);
-	if (ret) {
-		clk_put(clk);
-		return ret;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(of_pm_clk_add_clk);
-
 /**
  * of_pm_clk_add_clks - Start using device clock(s) for power management.
  * @dev: Device whose clock(s) is going to be used for power management.
@@ -377,46 +344,6 @@ static void __pm_clk_remove(struct pm_clock_entry *ce)
 }
 
 /**
- * pm_clk_remove - Stop using a device clock for power management.
- * @dev: Device whose clock should not be used for PM any more.
- * @con_id: Connection ID of the clock.
- *
- * Remove the clock represented by @con_id from the list of clocks used for
- * the power management of @dev.
- */
-void pm_clk_remove(struct device *dev, const char *con_id)
-{
-	struct pm_subsys_data *psd = dev_to_psd(dev);
-	struct pm_clock_entry *ce;
-
-	if (!psd)
-		return;
-
-	pm_clk_list_lock(psd);
-
-	list_for_each_entry(ce, &psd->clock_list, node) {
-		if (!con_id && !ce->con_id)
-			goto remove;
-		else if (!con_id || !ce->con_id)
-			continue;
-		else if (!strcmp(con_id, ce->con_id))
-			goto remove;
-	}
-
-	pm_clk_list_unlock(psd);
-	return;
-
- remove:
-	list_del(&ce->node);
-	if (ce->enabled_when_prepared)
-		psd->clock_op_might_sleep--;
-	pm_clk_list_unlock(psd);
-
-	__pm_clk_remove(ce);
-}
-EXPORT_SYMBOL_GPL(pm_clk_remove);
-
-/**
  * pm_clk_remove_clk - Stop using a device clock for power management.
  * @dev: Device whose clock should not be used for PM any more.
  * @clk: Clock pointer
@@ -519,6 +446,23 @@ void pm_clk_destroy(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_clk_destroy);
 
+static void pm_clk_destroy_action(void *data)
+{
+	pm_clk_destroy(data);
+}
+
+int devm_pm_clk_create(struct device *dev)
+{
+	int ret;
+
+	ret = pm_clk_create(dev);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, pm_clk_destroy_action, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_clk_create);
+
 /**
  * pm_clk_suspend - Disable clocks in a device's PM clock list.
  * @dev: Device to disable the clocks for.
@@ -776,7 +720,7 @@ static int pm_clk_notify(struct notifier_block *nb,
  * the remaining members of @clknb should be populated prior to calling this
  * routine.
  */
-void pm_clk_add_notifier(struct bus_type *bus,
+void pm_clk_add_notifier(const struct bus_type *bus,
 				 struct pm_clk_notifier_block *clknb)
 {
 	if (!bus || !clknb)
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index bbddb267c2e6..6ecf9ce4a4e6 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -11,6 +11,7 @@
 #include <linux/pm_clock.h>
 #include <linux/acpi.h>
 #include <linux/pm_domain.h>
+#include <linux/pm_opp.h>
 
 #include "power.h"
 
@@ -82,7 +83,7 @@ EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);
 /**
  * dev_pm_domain_attach - Attach a device to its PM domain.
  * @dev: Device to attach.
- * @power_on: Used to indicate whether we should power on the device.
+ * @flags: indicate whether we should power on/off the device on attach/detach
  *
  * The @dev may only be attached to a single PM domain. By iterating through
  * the available alternatives we try to find a valid PM domain for the device.
@@ -99,17 +100,20 @@ EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);
  * Returns 0 on successfully attached PM domain, or when it is found that the
  * device doesn't need a PM domain, else a negative error code.
  */
-int dev_pm_domain_attach(struct device *dev, bool power_on)
+int dev_pm_domain_attach(struct device *dev, u32 flags)
 {
 	int ret;
 
 	if (dev->pm_domain)
 		return 0;
 
-	ret = acpi_dev_pm_attach(dev, power_on);
+	ret = acpi_dev_pm_attach(dev, !!(flags & PD_FLAG_ATTACH_POWER_ON));
 	if (!ret)
 		ret = genpd_dev_pm_attach(dev);
 
+	if (dev->pm_domain)
+		dev->power.detach_power_off = !!(flags & PD_FLAG_DETACH_POWER_OFF);
+
 	return ret < 0 ? ret : 0;
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach);
@@ -168,14 +172,187 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
 
 /**
+ * dev_pm_domain_attach_list - Associate a device with its PM domains.
+ * @dev: The device used to lookup the PM domains for.
+ * @data: The data used for attaching to the PM domains.
+ * @list: An out-parameter with an allocated list of attached PM domains.
+ *
+ * This function helps to attach a device to its multiple PM domains. The
+ * caller, which is typically a driver's probe function, may provide a list of
+ * names for the PM domains that we should try to attach the device to, but it
+ * may also provide an empty list, in case the attach should be done for all of
+ * the available PM domains.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ *
+ * Returns the number of attached PM domains or a negative error code in case of
+ * a failure. Note that, to detach the list of PM domains, the driver shall call
+ * dev_pm_domain_detach_list(), typically during the remove phase.
+ */
+int dev_pm_domain_attach_list(struct device *dev,
+			      const struct dev_pm_domain_attach_data *data,
+			      struct dev_pm_domain_list **list)
+{
+	struct device_node *np = dev->of_node;
+	struct dev_pm_domain_list *pds;
+	struct device *pd_dev = NULL;
+	int ret, i, num_pds = 0;
+	bool by_id = true;
+	size_t size;
+	u32 pd_flags = data ? data->pd_flags : 0;
+	u32 link_flags = pd_flags & PD_FLAG_NO_DEV_LINK ? 0 :
+			DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME;
+
+	if (dev->pm_domain)
+		return -EEXIST;
+
+	/* For now this is limited to OF based platforms. */
+	if (!np)
+		return 0;
+
+	if (data && data->pd_names) {
+		num_pds = data->num_pd_names;
+		by_id = false;
+	} else {
+		num_pds = of_count_phandle_with_args(np, "power-domains",
+						     "#power-domain-cells");
+	}
+
+	if (num_pds <= 0)
+		return 0;
+
+	pds = kzalloc(sizeof(*pds), GFP_KERNEL);
+	if (!pds)
+		return -ENOMEM;
+
+	size = sizeof(*pds->pd_devs) + sizeof(*pds->pd_links) +
+	       sizeof(*pds->opp_tokens);
+	pds->pd_devs = kcalloc(num_pds, size, GFP_KERNEL);
+	if (!pds->pd_devs) {
+		ret = -ENOMEM;
+		goto free_pds;
+	}
+	pds->pd_links = (void *)(pds->pd_devs + num_pds);
+	pds->opp_tokens = (void *)(pds->pd_links + num_pds);
+
+	if (link_flags && pd_flags & PD_FLAG_DEV_LINK_ON)
+		link_flags |= DL_FLAG_RPM_ACTIVE;
+
+	for (i = 0; i < num_pds; i++) {
+		if (by_id)
+			pd_dev = dev_pm_domain_attach_by_id(dev, i);
+		else
+			pd_dev = dev_pm_domain_attach_by_name(dev,
+							data->pd_names[i]);
+		if (IS_ERR_OR_NULL(pd_dev)) {
+			ret = pd_dev ? PTR_ERR(pd_dev) : -ENODEV;
+			goto err_attach;
+		}
+
+		if (pd_flags & PD_FLAG_REQUIRED_OPP) {
+			struct dev_pm_opp_config config = {
+				.required_dev = pd_dev,
+				.required_dev_index = i,
+			};
+
+			ret = dev_pm_opp_set_config(dev, &config);
+			if (ret < 0)
+				goto err_link;
+
+			pds->opp_tokens[i] = ret;
+		}
+
+		if (link_flags) {
+			struct device_link *link;
+
+			link = device_link_add(dev, pd_dev, link_flags);
+			if (!link) {
+				ret = -ENODEV;
+				goto err_link;
+			}
+
+			pds->pd_links[i] = link;
+		}
+
+		pds->pd_devs[i] = pd_dev;
+	}
+
+	pds->num_pds = num_pds;
+	*list = pds;
+	return num_pds;
+
+err_link:
+	dev_pm_opp_clear_config(pds->opp_tokens[i]);
+	dev_pm_domain_detach(pd_dev, true);
+err_attach:
+	while (--i >= 0) {
+		dev_pm_opp_clear_config(pds->opp_tokens[i]);
+		if (pds->pd_links[i])
+			device_link_del(pds->pd_links[i]);
+		dev_pm_domain_detach(pds->pd_devs[i], true);
+	}
+	kfree(pds->pd_devs);
+free_pds:
+	kfree(pds);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach_list);
+
+/**
+ * devm_pm_domain_detach_list - devres-enabled version of dev_pm_domain_detach_list.
+ * @_list: The list of PM domains to detach.
+ *
+ * This function reverse the actions from devm_pm_domain_attach_list().
+ * it will be invoked during the remove phase from drivers implicitly if driver
+ * uses devm_pm_domain_attach_list() to attach the PM domains.
+ */
+static void devm_pm_domain_detach_list(void *_list)
+{
+	struct dev_pm_domain_list *list = _list;
+
+	dev_pm_domain_detach_list(list);
+}
+
+/**
+ * devm_pm_domain_attach_list - devres-enabled version of dev_pm_domain_attach_list
+ * @dev: The device used to lookup the PM domains for.
+ * @data: The data used for attaching to the PM domains.
+ * @list: An out-parameter with an allocated list of attached PM domains.
+ *
+ * NOTE: this will also handle calling devm_pm_domain_detach_list() for
+ * you during remove phase.
+ *
+ * Returns the number of attached PM domains or a negative error code in case of
+ * a failure.
+ */
+int devm_pm_domain_attach_list(struct device *dev,
+			       const struct dev_pm_domain_attach_data *data,
+			       struct dev_pm_domain_list **list)
+{
+	int ret, num_pds;
+
+	num_pds = dev_pm_domain_attach_list(dev, data, list);
+	if (num_pds <= 0)
+		return num_pds;
+
+	ret = devm_add_action_or_reset(dev, devm_pm_domain_detach_list, *list);
+	if (ret)
+		return ret;
+
+	return num_pds;
+}
+EXPORT_SYMBOL_GPL(devm_pm_domain_attach_list);
+
+/**
  * dev_pm_domain_detach - Detach a device from its PM domain.
  * @dev: Device to detach.
  * @power_off: Used to indicate whether we should power off the device.
  *
- * This functions will reverse the actions from dev_pm_domain_attach() and
- * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain.
- * Typically it should be invoked during the remove phase, either from
- * subsystem level code or from drivers.
+ * This functions will reverse the actions from dev_pm_domain_attach(),
+ * dev_pm_domain_attach_by_id() and dev_pm_domain_attach_by_name(), thus it
+ * detaches @dev from its PM domain.  Typically it should be invoked during the
+ * remove phase, either from subsystem level code or from drivers.
  *
  * Callers must ensure proper synchronization of this function with power
  * management callbacks.
@@ -188,6 +365,35 @@ void dev_pm_domain_detach(struct device *dev, bool power_off)
 EXPORT_SYMBOL_GPL(dev_pm_domain_detach);
 
 /**
+ * dev_pm_domain_detach_list - Detach a list of PM domains.
+ * @list: The list of PM domains to detach.
+ *
+ * This function reverse the actions from dev_pm_domain_attach_list().
+ * Typically it should be invoked during the remove phase from drivers.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+void dev_pm_domain_detach_list(struct dev_pm_domain_list *list)
+{
+	int i;
+
+	if (!list)
+		return;
+
+	for (i = 0; i < list->num_pds; i++) {
+		dev_pm_opp_clear_config(list->opp_tokens[i]);
+		if (list->pd_links[i])
+			device_link_del(list->pd_links[i]);
+		dev_pm_domain_detach(list->pd_devs[i], true);
+	}
+
+	kfree(list->pd_devs);
+	kfree(list);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_detach_list);
+
+/**
  * dev_pm_domain_start - Start the device through its PM domain.
  * @dev: Device to start.
  *
@@ -228,3 +434,24 @@ void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd)
 	device_pm_check_callbacks(dev);
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_set);
+
+/**
+ * dev_pm_domain_set_performance_state - Request a new performance state.
+ * @dev: The device to make the request for.
+ * @state: Target performance state for the device.
+ *
+ * This function should be called when a new performance state needs to be
+ * requested for a device that is attached to a PM domain. Note that, the
+ * support for performance scaling for PM domains is optional.
+ *
+ * Returns 0 on success and when performance scaling isn't supported, negative
+ * error code on failure.
+ */
+int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state)
+{
+	if (dev->pm_domain && dev->pm_domain->set_performance_state)
+		return dev->pm_domain->set_performance_state(dev, state);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_set_performance_state);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
deleted file mode 100644
index a934c679e6ce..000000000000
--- a/drivers/base/power/domain.c
+++ /dev/null
@@ -1,3294 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/base/power/domain.c - Common code related to device power domains.
- *
- * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
- */
-#define pr_fmt(fmt) "PM: " fmt
-
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/pm_opp.h>
-#include <linux/pm_runtime.h>
-#include <linux/pm_domain.h>
-#include <linux/pm_qos.h>
-#include <linux/pm_clock.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/suspend.h>
-#include <linux/export.h>
-#include <linux/cpu.h>
-#include <linux/debugfs.h>
-
-#include "power.h"
-
-#define GENPD_RETRY_MAX_MS	250		/* Approximate */
-
-#define GENPD_DEV_CALLBACK(genpd, type, callback, dev)		\
-({								\
-	type (*__routine)(struct device *__d); 			\
-	type __ret = (type)0;					\
-								\
-	__routine = genpd->dev_ops.callback; 			\
-	if (__routine) {					\
-		__ret = __routine(dev); 			\
-	}							\
-	__ret;							\
-})
-
-static LIST_HEAD(gpd_list);
-static DEFINE_MUTEX(gpd_list_lock);
-
-struct genpd_lock_ops {
-	void (*lock)(struct generic_pm_domain *genpd);
-	void (*lock_nested)(struct generic_pm_domain *genpd, int depth);
-	int (*lock_interruptible)(struct generic_pm_domain *genpd);
-	void (*unlock)(struct generic_pm_domain *genpd);
-};
-
-static void genpd_lock_mtx(struct generic_pm_domain *genpd)
-{
-	mutex_lock(&genpd->mlock);
-}
-
-static void genpd_lock_nested_mtx(struct generic_pm_domain *genpd,
-					int depth)
-{
-	mutex_lock_nested(&genpd->mlock, depth);
-}
-
-static int genpd_lock_interruptible_mtx(struct generic_pm_domain *genpd)
-{
-	return mutex_lock_interruptible(&genpd->mlock);
-}
-
-static void genpd_unlock_mtx(struct generic_pm_domain *genpd)
-{
-	return mutex_unlock(&genpd->mlock);
-}
-
-static const struct genpd_lock_ops genpd_mtx_ops = {
-	.lock = genpd_lock_mtx,
-	.lock_nested = genpd_lock_nested_mtx,
-	.lock_interruptible = genpd_lock_interruptible_mtx,
-	.unlock = genpd_unlock_mtx,
-};
-
-static void genpd_lock_spin(struct generic_pm_domain *genpd)
-	__acquires(&genpd->slock)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&genpd->slock, flags);
-	genpd->lock_flags = flags;
-}
-
-static void genpd_lock_nested_spin(struct generic_pm_domain *genpd,
-					int depth)
-	__acquires(&genpd->slock)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave_nested(&genpd->slock, flags, depth);
-	genpd->lock_flags = flags;
-}
-
-static int genpd_lock_interruptible_spin(struct generic_pm_domain *genpd)
-	__acquires(&genpd->slock)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&genpd->slock, flags);
-	genpd->lock_flags = flags;
-	return 0;
-}
-
-static void genpd_unlock_spin(struct generic_pm_domain *genpd)
-	__releases(&genpd->slock)
-{
-	spin_unlock_irqrestore(&genpd->slock, genpd->lock_flags);
-}
-
-static const struct genpd_lock_ops genpd_spin_ops = {
-	.lock = genpd_lock_spin,
-	.lock_nested = genpd_lock_nested_spin,
-	.lock_interruptible = genpd_lock_interruptible_spin,
-	.unlock = genpd_unlock_spin,
-};
-
-#define genpd_lock(p)			p->lock_ops->lock(p)
-#define genpd_lock_nested(p, d)		p->lock_ops->lock_nested(p, d)
-#define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
-#define genpd_unlock(p)			p->lock_ops->unlock(p)
-
-#define genpd_status_on(genpd)		(genpd->status == GENPD_STATE_ON)
-#define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
-#define genpd_is_always_on(genpd)	(genpd->flags & GENPD_FLAG_ALWAYS_ON)
-#define genpd_is_active_wakeup(genpd)	(genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
-#define genpd_is_cpu_domain(genpd)	(genpd->flags & GENPD_FLAG_CPU_DOMAIN)
-#define genpd_is_rpm_always_on(genpd)	(genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON)
-
-static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
-		const struct generic_pm_domain *genpd)
-{
-	bool ret;
-
-	ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
-
-	/*
-	 * Warn once if an IRQ safe device is attached to a no sleep domain, as
-	 * to indicate a suboptimal configuration for PM. For an always on
-	 * domain this isn't case, thus don't warn.
-	 */
-	if (ret && !genpd_is_always_on(genpd))
-		dev_warn_once(dev, "PM domain %s will not be powered off\n",
-				genpd->name);
-
-	return ret;
-}
-
-static int genpd_runtime_suspend(struct device *dev);
-
-/*
- * Get the generic PM domain for a particular struct device.
- * This validates the struct device pointer, the PM domain pointer,
- * and checks that the PM domain pointer is a real generic PM domain.
- * Any failure results in NULL being returned.
- */
-static struct generic_pm_domain *dev_to_genpd_safe(struct device *dev)
-{
-	if (IS_ERR_OR_NULL(dev) || IS_ERR_OR_NULL(dev->pm_domain))
-		return NULL;
-
-	/* A genpd's always have its ->runtime_suspend() callback assigned. */
-	if (dev->pm_domain->ops.runtime_suspend == genpd_runtime_suspend)
-		return pd_to_genpd(dev->pm_domain);
-
-	return NULL;
-}
-
-/*
- * This should only be used where we are certain that the pm_domain
- * attached to the device is a genpd domain.
- */
-static struct generic_pm_domain *dev_to_genpd(struct device *dev)
-{
-	if (IS_ERR_OR_NULL(dev->pm_domain))
-		return ERR_PTR(-EINVAL);
-
-	return pd_to_genpd(dev->pm_domain);
-}
-
-static int genpd_stop_dev(const struct generic_pm_domain *genpd,
-			  struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, stop, dev);
-}
-
-static int genpd_start_dev(const struct generic_pm_domain *genpd,
-			   struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, start, dev);
-}
-
-static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
-{
-	bool ret = false;
-
-	if (!WARN_ON(atomic_read(&genpd->sd_count) == 0))
-		ret = !!atomic_dec_and_test(&genpd->sd_count);
-
-	return ret;
-}
-
-static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
-{
-	atomic_inc(&genpd->sd_count);
-	smp_mb__after_atomic();
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *genpd_debugfs_dir;
-
-static void genpd_debug_add(struct generic_pm_domain *genpd);
-
-static void genpd_debug_remove(struct generic_pm_domain *genpd)
-{
-	struct dentry *d;
-
-	d = debugfs_lookup(genpd->name, genpd_debugfs_dir);
-	debugfs_remove(d);
-}
-
-static void genpd_update_accounting(struct generic_pm_domain *genpd)
-{
-	ktime_t delta, now;
-
-	now = ktime_get();
-	delta = ktime_sub(now, genpd->accounting_time);
-
-	/*
-	 * If genpd->status is active, it means we are just
-	 * out of off and so update the idle time and vice
-	 * versa.
-	 */
-	if (genpd->status == GENPD_STATE_ON) {
-		int state_idx = genpd->state_idx;
-
-		genpd->states[state_idx].idle_time =
-			ktime_add(genpd->states[state_idx].idle_time, delta);
-	} else {
-		genpd->on_time = ktime_add(genpd->on_time, delta);
-	}
-
-	genpd->accounting_time = now;
-}
-#else
-static inline void genpd_debug_add(struct generic_pm_domain *genpd) {}
-static inline void genpd_debug_remove(struct generic_pm_domain *genpd) {}
-static inline void genpd_update_accounting(struct generic_pm_domain *genpd) {}
-#endif
-
-static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd,
-					   unsigned int state)
-{
-	struct generic_pm_domain_data *pd_data;
-	struct pm_domain_data *pdd;
-	struct gpd_link *link;
-
-	/* New requested state is same as Max requested state */
-	if (state == genpd->performance_state)
-		return state;
-
-	/* New requested state is higher than Max requested state */
-	if (state > genpd->performance_state)
-		return state;
-
-	/* Traverse all devices within the domain */
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		pd_data = to_gpd_data(pdd);
-
-		if (pd_data->performance_state > state)
-			state = pd_data->performance_state;
-	}
-
-	/*
-	 * Traverse all sub-domains within the domain. This can be
-	 * done without any additional locking as the link->performance_state
-	 * field is protected by the parent genpd->lock, which is already taken.
-	 *
-	 * Also note that link->performance_state (subdomain's performance state
-	 * requirement to parent domain) is different from
-	 * link->child->performance_state (current performance state requirement
-	 * of the devices/sub-domains of the subdomain) and so can have a
-	 * different value.
-	 *
-	 * Note that we also take vote from powered-off sub-domains into account
-	 * as the same is done for devices right now.
-	 */
-	list_for_each_entry(link, &genpd->parent_links, parent_node) {
-		if (link->performance_state > state)
-			state = link->performance_state;
-	}
-
-	return state;
-}
-
-static int genpd_xlate_performance_state(struct generic_pm_domain *genpd,
-					 struct generic_pm_domain *parent,
-					 unsigned int pstate)
-{
-	if (!parent->set_performance_state)
-		return pstate;
-
-	return dev_pm_opp_xlate_performance_state(genpd->opp_table,
-						  parent->opp_table,
-						  pstate);
-}
-
-static int _genpd_set_performance_state(struct generic_pm_domain *genpd,
-					unsigned int state, int depth)
-{
-	struct generic_pm_domain *parent;
-	struct gpd_link *link;
-	int parent_state, ret;
-
-	if (state == genpd->performance_state)
-		return 0;
-
-	/* Propagate to parents of genpd */
-	list_for_each_entry(link, &genpd->child_links, child_node) {
-		parent = link->parent;
-
-		/* Find parent's performance state */
-		ret = genpd_xlate_performance_state(genpd, parent, state);
-		if (unlikely(ret < 0))
-			goto err;
-
-		parent_state = ret;
-
-		genpd_lock_nested(parent, depth + 1);
-
-		link->prev_performance_state = link->performance_state;
-		link->performance_state = parent_state;
-		parent_state = _genpd_reeval_performance_state(parent,
-						parent_state);
-		ret = _genpd_set_performance_state(parent, parent_state, depth + 1);
-		if (ret)
-			link->performance_state = link->prev_performance_state;
-
-		genpd_unlock(parent);
-
-		if (ret)
-			goto err;
-	}
-
-	if (genpd->set_performance_state) {
-		ret = genpd->set_performance_state(genpd, state);
-		if (ret)
-			goto err;
-	}
-
-	genpd->performance_state = state;
-	return 0;
-
-err:
-	/* Encountered an error, lets rollback */
-	list_for_each_entry_continue_reverse(link, &genpd->child_links,
-					     child_node) {
-		parent = link->parent;
-
-		genpd_lock_nested(parent, depth + 1);
-
-		parent_state = link->prev_performance_state;
-		link->performance_state = parent_state;
-
-		parent_state = _genpd_reeval_performance_state(parent,
-						parent_state);
-		if (_genpd_set_performance_state(parent, parent_state, depth + 1)) {
-			pr_err("%s: Failed to roll back to %d performance state\n",
-			       parent->name, parent_state);
-		}
-
-		genpd_unlock(parent);
-	}
-
-	return ret;
-}
-
-static int genpd_set_performance_state(struct device *dev, unsigned int state)
-{
-	struct generic_pm_domain *genpd = dev_to_genpd(dev);
-	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
-	unsigned int prev_state;
-	int ret;
-
-	prev_state = gpd_data->performance_state;
-	if (prev_state == state)
-		return 0;
-
-	gpd_data->performance_state = state;
-	state = _genpd_reeval_performance_state(genpd, state);
-
-	ret = _genpd_set_performance_state(genpd, state, 0);
-	if (ret)
-		gpd_data->performance_state = prev_state;
-
-	return ret;
-}
-
-static int genpd_drop_performance_state(struct device *dev)
-{
-	unsigned int prev_state = dev_gpd_data(dev)->performance_state;
-
-	if (!genpd_set_performance_state(dev, 0))
-		return prev_state;
-
-	return 0;
-}
-
-static void genpd_restore_performance_state(struct device *dev,
-					    unsigned int state)
-{
-	if (state)
-		genpd_set_performance_state(dev, state);
-}
-
-/**
- * dev_pm_genpd_set_performance_state- Set performance state of device's power
- * domain.
- *
- * @dev: Device for which the performance-state needs to be set.
- * @state: Target performance state of the device. This can be set as 0 when the
- *	   device doesn't have any performance state constraints left (And so
- *	   the device wouldn't participate anymore to find the target
- *	   performance state of the genpd).
- *
- * It is assumed that the users guarantee that the genpd wouldn't be detached
- * while this routine is getting called.
- *
- * Returns 0 on success and negative error values on failures.
- */
-int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
-{
-	struct generic_pm_domain *genpd;
-	int ret;
-
-	genpd = dev_to_genpd_safe(dev);
-	if (!genpd)
-		return -ENODEV;
-
-	if (WARN_ON(!dev->power.subsys_data ||
-		     !dev->power.subsys_data->domain_data))
-		return -EINVAL;
-
-	genpd_lock(genpd);
-	ret = genpd_set_performance_state(dev, state);
-	genpd_unlock(genpd);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state);
-
-/**
- * dev_pm_genpd_set_next_wakeup - Notify PM framework of an impending wakeup.
- *
- * @dev: Device to handle
- * @next: impending interrupt/wakeup for the device
- *
- *
- * Allow devices to inform of the next wakeup. It's assumed that the users
- * guarantee that the genpd wouldn't be detached while this routine is getting
- * called. Additionally, it's also assumed that @dev isn't runtime suspended
- * (RPM_SUSPENDED)."
- * Although devices are expected to update the next_wakeup after the end of
- * their usecase as well, it is possible the devices themselves may not know
- * about that, so stale @next will be ignored when powering off the domain.
- */
-void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
-{
-	struct generic_pm_domain_data *gpd_data;
-	struct generic_pm_domain *genpd;
-
-	genpd = dev_to_genpd_safe(dev);
-	if (!genpd)
-		return;
-
-	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	gpd_data->next_wakeup = next;
-}
-EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
-
-static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
-{
-	unsigned int state_idx = genpd->state_idx;
-	ktime_t time_start;
-	s64 elapsed_ns;
-	int ret;
-
-	/* Notify consumers that we are about to power on. */
-	ret = raw_notifier_call_chain_robust(&genpd->power_notifiers,
-					     GENPD_NOTIFY_PRE_ON,
-					     GENPD_NOTIFY_OFF, NULL);
-	ret = notifier_to_errno(ret);
-	if (ret)
-		return ret;
-
-	if (!genpd->power_on)
-		goto out;
-
-	if (!timed) {
-		ret = genpd->power_on(genpd);
-		if (ret)
-			goto err;
-
-		goto out;
-	}
-
-	time_start = ktime_get();
-	ret = genpd->power_on(genpd);
-	if (ret)
-		goto err;
-
-	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-	if (elapsed_ns <= genpd->states[state_idx].power_on_latency_ns)
-		goto out;
-
-	genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
-	genpd->max_off_time_changed = true;
-	pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
-		 genpd->name, "on", elapsed_ns);
-
-out:
-	raw_notifier_call_chain(&genpd->power_notifiers, GENPD_NOTIFY_ON, NULL);
-	return 0;
-err:
-	raw_notifier_call_chain(&genpd->power_notifiers, GENPD_NOTIFY_OFF,
-				NULL);
-	return ret;
-}
-
-static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
-{
-	unsigned int state_idx = genpd->state_idx;
-	ktime_t time_start;
-	s64 elapsed_ns;
-	int ret;
-
-	/* Notify consumers that we are about to power off. */
-	ret = raw_notifier_call_chain_robust(&genpd->power_notifiers,
-					     GENPD_NOTIFY_PRE_OFF,
-					     GENPD_NOTIFY_ON, NULL);
-	ret = notifier_to_errno(ret);
-	if (ret)
-		return ret;
-
-	if (!genpd->power_off)
-		goto out;
-
-	if (!timed) {
-		ret = genpd->power_off(genpd);
-		if (ret)
-			goto busy;
-
-		goto out;
-	}
-
-	time_start = ktime_get();
-	ret = genpd->power_off(genpd);
-	if (ret)
-		goto busy;
-
-	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-	if (elapsed_ns <= genpd->states[state_idx].power_off_latency_ns)
-		goto out;
-
-	genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
-	genpd->max_off_time_changed = true;
-	pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
-		 genpd->name, "off", elapsed_ns);
-
-out:
-	raw_notifier_call_chain(&genpd->power_notifiers, GENPD_NOTIFY_OFF,
-				NULL);
-	return 0;
-busy:
-	raw_notifier_call_chain(&genpd->power_notifiers, GENPD_NOTIFY_ON, NULL);
-	return ret;
-}
-
-/**
- * genpd_queue_power_off_work - Queue up the execution of genpd_power_off().
- * @genpd: PM domain to power off.
- *
- * Queue up the execution of genpd_power_off() unless it's already been done
- * before.
- */
-static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
-{
-	queue_work(pm_wq, &genpd->power_off_work);
-}
-
-/**
- * genpd_power_off - Remove power from a given PM domain.
- * @genpd: PM domain to power down.
- * @one_dev_on: If invoked from genpd's ->runtime_suspend|resume() callback, the
- * RPM status of the releated device is in an intermediate state, not yet turned
- * into RPM_SUSPENDED. This means genpd_power_off() must allow one device to not
- * be RPM_SUSPENDED, while it tries to power off the PM domain.
- * @depth: nesting count for lockdep.
- *
- * If all of the @genpd's devices have been suspended and all of its subdomains
- * have been powered down, remove power from @genpd.
- */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
-			   unsigned int depth)
-{
-	struct pm_domain_data *pdd;
-	struct gpd_link *link;
-	unsigned int not_suspended = 0;
-	int ret;
-
-	/*
-	 * Do not try to power off the domain in the following situations:
-	 * (1) The domain is already in the "power off" state.
-	 * (2) System suspend is in progress.
-	 */
-	if (!genpd_status_on(genpd) || genpd->prepared_count > 0)
-		return 0;
-
-	/*
-	 * Abort power off for the PM domain in the following situations:
-	 * (1) The domain is configured as always on.
-	 * (2) When the domain has a subdomain being powered on.
-	 */
-	if (genpd_is_always_on(genpd) ||
-			genpd_is_rpm_always_on(genpd) ||
-			atomic_read(&genpd->sd_count) > 0)
-		return -EBUSY;
-
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		enum pm_qos_flags_status stat;
-
-		stat = dev_pm_qos_flags(pdd->dev, PM_QOS_FLAG_NO_POWER_OFF);
-		if (stat > PM_QOS_FLAGS_NONE)
-			return -EBUSY;
-
-		/*
-		 * Do not allow PM domain to be powered off, when an IRQ safe
-		 * device is part of a non-IRQ safe domain.
-		 */
-		if (!pm_runtime_suspended(pdd->dev) ||
-			irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
-			not_suspended++;
-	}
-
-	if (not_suspended > 1 || (not_suspended == 1 && !one_dev_on))
-		return -EBUSY;
-
-	if (genpd->gov && genpd->gov->power_down_ok) {
-		if (!genpd->gov->power_down_ok(&genpd->domain))
-			return -EAGAIN;
-	}
-
-	/* Default to shallowest state. */
-	if (!genpd->gov)
-		genpd->state_idx = 0;
-
-	/* Don't power off, if a child domain is waiting to power on. */
-	if (atomic_read(&genpd->sd_count) > 0)
-		return -EBUSY;
-
-	ret = _genpd_power_off(genpd, true);
-	if (ret) {
-		genpd->states[genpd->state_idx].rejected++;
-		return ret;
-	}
-
-	genpd->status = GENPD_STATE_OFF;
-	genpd_update_accounting(genpd);
-	genpd->states[genpd->state_idx].usage++;
-
-	list_for_each_entry(link, &genpd->child_links, child_node) {
-		genpd_sd_counter_dec(link->parent);
-		genpd_lock_nested(link->parent, depth + 1);
-		genpd_power_off(link->parent, false, depth + 1);
-		genpd_unlock(link->parent);
-	}
-
-	return 0;
-}
-
-/**
- * genpd_power_on - Restore power to a given PM domain and its parents.
- * @genpd: PM domain to power up.
- * @depth: nesting count for lockdep.
- *
- * Restore power to @genpd and all of its parents so that it is possible to
- * resume a device belonging to it.
- */
-static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
-{
-	struct gpd_link *link;
-	int ret = 0;
-
-	if (genpd_status_on(genpd))
-		return 0;
-
-	/*
-	 * The list is guaranteed not to change while the loop below is being
-	 * executed, unless one of the parents' .power_on() callbacks fiddles
-	 * with it.
-	 */
-	list_for_each_entry(link, &genpd->child_links, child_node) {
-		struct generic_pm_domain *parent = link->parent;
-
-		genpd_sd_counter_inc(parent);
-
-		genpd_lock_nested(parent, depth + 1);
-		ret = genpd_power_on(parent, depth + 1);
-		genpd_unlock(parent);
-
-		if (ret) {
-			genpd_sd_counter_dec(parent);
-			goto err;
-		}
-	}
-
-	ret = _genpd_power_on(genpd, true);
-	if (ret)
-		goto err;
-
-	genpd->status = GENPD_STATE_ON;
-	genpd_update_accounting(genpd);
-
-	return 0;
-
- err:
-	list_for_each_entry_continue_reverse(link,
-					&genpd->child_links,
-					child_node) {
-		genpd_sd_counter_dec(link->parent);
-		genpd_lock_nested(link->parent, depth + 1);
-		genpd_power_off(link->parent, false, depth + 1);
-		genpd_unlock(link->parent);
-	}
-
-	return ret;
-}
-
-static int genpd_dev_pm_start(struct device *dev)
-{
-	struct generic_pm_domain *genpd = dev_to_genpd(dev);
-
-	return genpd_start_dev(genpd, dev);
-}
-
-static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
-				     unsigned long val, void *ptr)
-{
-	struct generic_pm_domain_data *gpd_data;
-	struct device *dev;
-
-	gpd_data = container_of(nb, struct generic_pm_domain_data, nb);
-	dev = gpd_data->base.dev;
-
-	for (;;) {
-		struct generic_pm_domain *genpd;
-		struct pm_domain_data *pdd;
-
-		spin_lock_irq(&dev->power.lock);
-
-		pdd = dev->power.subsys_data ?
-				dev->power.subsys_data->domain_data : NULL;
-		if (pdd) {
-			to_gpd_data(pdd)->td.constraint_changed = true;
-			genpd = dev_to_genpd(dev);
-		} else {
-			genpd = ERR_PTR(-ENODATA);
-		}
-
-		spin_unlock_irq(&dev->power.lock);
-
-		if (!IS_ERR(genpd)) {
-			genpd_lock(genpd);
-			genpd->max_off_time_changed = true;
-			genpd_unlock(genpd);
-		}
-
-		dev = dev->parent;
-		if (!dev || dev->power.ignore_children)
-			break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-/**
- * genpd_power_off_work_fn - Power off PM domain whose subdomain count is 0.
- * @work: Work structure used for scheduling the execution of this function.
- */
-static void genpd_power_off_work_fn(struct work_struct *work)
-{
-	struct generic_pm_domain *genpd;
-
-	genpd = container_of(work, struct generic_pm_domain, power_off_work);
-
-	genpd_lock(genpd);
-	genpd_power_off(genpd, false, 0);
-	genpd_unlock(genpd);
-}
-
-/**
- * __genpd_runtime_suspend - walk the hierarchy of ->runtime_suspend() callbacks
- * @dev: Device to handle.
- */
-static int __genpd_runtime_suspend(struct device *dev)
-{
-	int (*cb)(struct device *__dev);
-
-	if (dev->type && dev->type->pm)
-		cb = dev->type->pm->runtime_suspend;
-	else if (dev->class && dev->class->pm)
-		cb = dev->class->pm->runtime_suspend;
-	else if (dev->bus && dev->bus->pm)
-		cb = dev->bus->pm->runtime_suspend;
-	else
-		cb = NULL;
-
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = dev->driver->pm->runtime_suspend;
-
-	return cb ? cb(dev) : 0;
-}
-
-/**
- * __genpd_runtime_resume - walk the hierarchy of ->runtime_resume() callbacks
- * @dev: Device to handle.
- */
-static int __genpd_runtime_resume(struct device *dev)
-{
-	int (*cb)(struct device *__dev);
-
-	if (dev->type && dev->type->pm)
-		cb = dev->type->pm->runtime_resume;
-	else if (dev->class && dev->class->pm)
-		cb = dev->class->pm->runtime_resume;
-	else if (dev->bus && dev->bus->pm)
-		cb = dev->bus->pm->runtime_resume;
-	else
-		cb = NULL;
-
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = dev->driver->pm->runtime_resume;
-
-	return cb ? cb(dev) : 0;
-}
-
-/**
- * genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
- * @dev: Device to suspend.
- *
- * Carry out a runtime suspend of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a PM domain consisting of I/O devices.
- */
-static int genpd_runtime_suspend(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	bool (*suspend_ok)(struct device *__dev);
-	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
-	struct gpd_timing_data *td = &gpd_data->td;
-	bool runtime_pm = pm_runtime_enabled(dev);
-	ktime_t time_start;
-	s64 elapsed_ns;
-	int ret;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	/*
-	 * A runtime PM centric subsystem/driver may re-use the runtime PM
-	 * callbacks for other purposes than runtime PM. In those scenarios
-	 * runtime PM is disabled. Under these circumstances, we shall skip
-	 * validating/measuring the PM QoS latency.
-	 */
-	suspend_ok = genpd->gov ? genpd->gov->suspend_ok : NULL;
-	if (runtime_pm && suspend_ok && !suspend_ok(dev))
-		return -EBUSY;
-
-	/* Measure suspend latency. */
-	time_start = 0;
-	if (runtime_pm)
-		time_start = ktime_get();
-
-	ret = __genpd_runtime_suspend(dev);
-	if (ret)
-		return ret;
-
-	ret = genpd_stop_dev(genpd, dev);
-	if (ret) {
-		__genpd_runtime_resume(dev);
-		return ret;
-	}
-
-	/* Update suspend latency value if the measured time exceeds it. */
-	if (runtime_pm) {
-		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-		if (elapsed_ns > td->suspend_latency_ns) {
-			td->suspend_latency_ns = elapsed_ns;
-			dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
-				elapsed_ns);
-			genpd->max_off_time_changed = true;
-			td->constraint_changed = true;
-		}
-	}
-
-	/*
-	 * If power.irq_safe is set, this routine may be run with
-	 * IRQs disabled, so suspend only if the PM domain also is irq_safe.
-	 */
-	if (irq_safe_dev_in_no_sleep_domain(dev, genpd))
-		return 0;
-
-	genpd_lock(genpd);
-	gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
-	genpd_power_off(genpd, true, 0);
-	genpd_unlock(genpd);
-
-	return 0;
-}
-
-/**
- * genpd_runtime_resume - Resume a device belonging to I/O PM domain.
- * @dev: Device to resume.
- *
- * Carry out a runtime resume of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a PM domain consisting of I/O devices.
- */
-static int genpd_runtime_resume(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
-	struct gpd_timing_data *td = &gpd_data->td;
-	bool runtime_pm = pm_runtime_enabled(dev);
-	ktime_t time_start;
-	s64 elapsed_ns;
-	int ret;
-	bool timed = true;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	/*
-	 * As we don't power off a non IRQ safe domain, which holds
-	 * an IRQ safe device, we don't need to restore power to it.
-	 */
-	if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) {
-		timed = false;
-		goto out;
-	}
-
-	genpd_lock(genpd);
-	ret = genpd_power_on(genpd, 0);
-	if (!ret)
-		genpd_restore_performance_state(dev, gpd_data->rpm_pstate);
-	genpd_unlock(genpd);
-
-	if (ret)
-		return ret;
-
- out:
-	/* Measure resume latency. */
-	time_start = 0;
-	if (timed && runtime_pm)
-		time_start = ktime_get();
-
-	ret = genpd_start_dev(genpd, dev);
-	if (ret)
-		goto err_poweroff;
-
-	ret = __genpd_runtime_resume(dev);
-	if (ret)
-		goto err_stop;
-
-	/* Update resume latency value if the measured time exceeds it. */
-	if (timed && runtime_pm) {
-		elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-		if (elapsed_ns > td->resume_latency_ns) {
-			td->resume_latency_ns = elapsed_ns;
-			dev_dbg(dev, "resume latency exceeded, %lld ns\n",
-				elapsed_ns);
-			genpd->max_off_time_changed = true;
-			td->constraint_changed = true;
-		}
-	}
-
-	return 0;
-
-err_stop:
-	genpd_stop_dev(genpd, dev);
-err_poweroff:
-	if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) {
-		genpd_lock(genpd);
-		gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
-		genpd_power_off(genpd, true, 0);
-		genpd_unlock(genpd);
-	}
-
-	return ret;
-}
-
-static bool pd_ignore_unused;
-static int __init pd_ignore_unused_setup(char *__unused)
-{
-	pd_ignore_unused = true;
-	return 1;
-}
-__setup("pd_ignore_unused", pd_ignore_unused_setup);
-
-/**
- * genpd_power_off_unused - Power off all PM domains with no devices in use.
- */
-static int __init genpd_power_off_unused(void)
-{
-	struct generic_pm_domain *genpd;
-
-	if (pd_ignore_unused) {
-		pr_warn("genpd: Not disabling unused power domains\n");
-		return 0;
-	}
-
-	mutex_lock(&gpd_list_lock);
-
-	list_for_each_entry(genpd, &gpd_list, gpd_list_node)
-		genpd_queue_power_off_work(genpd);
-
-	mutex_unlock(&gpd_list_lock);
-
-	return 0;
-}
-late_initcall(genpd_power_off_unused);
-
-#ifdef CONFIG_PM_SLEEP
-
-/**
- * genpd_sync_power_off - Synchronously power off a PM domain and its parents.
- * @genpd: PM domain to power off, if possible.
- * @use_lock: use the lock.
- * @depth: nesting count for lockdep.
- *
- * Check if the given PM domain can be powered off (during system suspend or
- * hibernation) and do that if so.  Also, in that case propagate to its parents.
- *
- * This function is only called in "noirq" and "syscore" stages of system power
- * transitions. The "noirq" callbacks may be executed asynchronously, thus in
- * these cases the lock must be held.
- */
-static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
-				 unsigned int depth)
-{
-	struct gpd_link *link;
-
-	if (!genpd_status_on(genpd) || genpd_is_always_on(genpd))
-		return;
-
-	if (genpd->suspended_count != genpd->device_count
-	    || atomic_read(&genpd->sd_count) > 0)
-		return;
-
-	/* Choose the deepest state when suspending */
-	genpd->state_idx = genpd->state_count - 1;
-	if (_genpd_power_off(genpd, false))
-		return;
-
-	genpd->status = GENPD_STATE_OFF;
-
-	list_for_each_entry(link, &genpd->child_links, child_node) {
-		genpd_sd_counter_dec(link->parent);
-
-		if (use_lock)
-			genpd_lock_nested(link->parent, depth + 1);
-
-		genpd_sync_power_off(link->parent, use_lock, depth + 1);
-
-		if (use_lock)
-			genpd_unlock(link->parent);
-	}
-}
-
-/**
- * genpd_sync_power_on - Synchronously power on a PM domain and its parents.
- * @genpd: PM domain to power on.
- * @use_lock: use the lock.
- * @depth: nesting count for lockdep.
- *
- * This function is only called in "noirq" and "syscore" stages of system power
- * transitions. The "noirq" callbacks may be executed asynchronously, thus in
- * these cases the lock must be held.
- */
-static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock,
-				unsigned int depth)
-{
-	struct gpd_link *link;
-
-	if (genpd_status_on(genpd))
-		return;
-
-	list_for_each_entry(link, &genpd->child_links, child_node) {
-		genpd_sd_counter_inc(link->parent);
-
-		if (use_lock)
-			genpd_lock_nested(link->parent, depth + 1);
-
-		genpd_sync_power_on(link->parent, use_lock, depth + 1);
-
-		if (use_lock)
-			genpd_unlock(link->parent);
-	}
-
-	_genpd_power_on(genpd, false);
-	genpd->status = GENPD_STATE_ON;
-}
-
-/**
- * genpd_prepare - Start power transition of a device in a PM domain.
- * @dev: Device to start the transition of.
- *
- * Start a power transition of a device (during a system-wide power transition)
- * under the assumption that its pm_domain field points to the domain member of
- * an object of type struct generic_pm_domain representing a PM domain
- * consisting of I/O devices.
- */
-static int genpd_prepare(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	int ret;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	genpd_lock(genpd);
-
-	if (genpd->prepared_count++ == 0)
-		genpd->suspended_count = 0;
-
-	genpd_unlock(genpd);
-
-	ret = pm_generic_prepare(dev);
-	if (ret < 0) {
-		genpd_lock(genpd);
-
-		genpd->prepared_count--;
-
-		genpd_unlock(genpd);
-	}
-
-	/* Never return 1, as genpd don't cope with the direct_complete path. */
-	return ret >= 0 ? 0 : ret;
-}
-
-/**
- * genpd_finish_suspend - Completion of suspend or hibernation of device in an
- *   I/O pm domain.
- * @dev: Device to suspend.
- * @poweroff: Specifies if this is a poweroff_noirq or suspend_noirq callback.
- *
- * Stop the device and remove power from the domain if all devices in it have
- * been stopped.
- */
-static int genpd_finish_suspend(struct device *dev, bool poweroff)
-{
-	struct generic_pm_domain *genpd;
-	int ret = 0;
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	if (poweroff)
-		ret = pm_generic_poweroff_noirq(dev);
-	else
-		ret = pm_generic_suspend_noirq(dev);
-	if (ret)
-		return ret;
-
-	if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
-		return 0;
-
-	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
-	    !pm_runtime_status_suspended(dev)) {
-		ret = genpd_stop_dev(genpd, dev);
-		if (ret) {
-			if (poweroff)
-				pm_generic_restore_noirq(dev);
-			else
-				pm_generic_resume_noirq(dev);
-			return ret;
-		}
-	}
-
-	genpd_lock(genpd);
-	genpd->suspended_count++;
-	genpd_sync_power_off(genpd, true, 0);
-	genpd_unlock(genpd);
-
-	return 0;
-}
-
-/**
- * genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain.
- * @dev: Device to suspend.
- *
- * Stop the device and remove power from the domain if all devices in it have
- * been stopped.
- */
-static int genpd_suspend_noirq(struct device *dev)
-{
-	dev_dbg(dev, "%s()\n", __func__);
-
-	return genpd_finish_suspend(dev, false);
-}
-
-/**
- * genpd_resume_noirq - Start of resume of device in an I/O PM domain.
- * @dev: Device to resume.
- *
- * Restore power to the device's PM domain, if necessary, and start the device.
- */
-static int genpd_resume_noirq(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	int ret;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	if (device_wakeup_path(dev) && genpd_is_active_wakeup(genpd))
-		return pm_generic_resume_noirq(dev);
-
-	genpd_lock(genpd);
-	genpd_sync_power_on(genpd, true, 0);
-	genpd->suspended_count--;
-	genpd_unlock(genpd);
-
-	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
-	    !pm_runtime_status_suspended(dev)) {
-		ret = genpd_start_dev(genpd, dev);
-		if (ret)
-			return ret;
-	}
-
-	return pm_generic_resume_noirq(dev);
-}
-
-/**
- * genpd_freeze_noirq - Completion of freezing a device in an I/O PM domain.
- * @dev: Device to freeze.
- *
- * Carry out a late freeze of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int genpd_freeze_noirq(struct device *dev)
-{
-	const struct generic_pm_domain *genpd;
-	int ret = 0;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	ret = pm_generic_freeze_noirq(dev);
-	if (ret)
-		return ret;
-
-	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
-	    !pm_runtime_status_suspended(dev))
-		ret = genpd_stop_dev(genpd, dev);
-
-	return ret;
-}
-
-/**
- * genpd_thaw_noirq - Early thaw of device in an I/O PM domain.
- * @dev: Device to thaw.
- *
- * Start the device, unless power has been removed from the domain already
- * before the system transition.
- */
-static int genpd_thaw_noirq(struct device *dev)
-{
-	const struct generic_pm_domain *genpd;
-	int ret = 0;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
-	    !pm_runtime_status_suspended(dev)) {
-		ret = genpd_start_dev(genpd, dev);
-		if (ret)
-			return ret;
-	}
-
-	return pm_generic_thaw_noirq(dev);
-}
-
-/**
- * genpd_poweroff_noirq - Completion of hibernation of device in an
- *   I/O PM domain.
- * @dev: Device to poweroff.
- *
- * Stop the device and remove power from the domain if all devices in it have
- * been stopped.
- */
-static int genpd_poweroff_noirq(struct device *dev)
-{
-	dev_dbg(dev, "%s()\n", __func__);
-
-	return genpd_finish_suspend(dev, true);
-}
-
-/**
- * genpd_restore_noirq - Start of restore of device in an I/O PM domain.
- * @dev: Device to resume.
- *
- * Make sure the domain will be in the same power state as before the
- * hibernation the system is resuming from and start the device if necessary.
- */
-static int genpd_restore_noirq(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	int ret = 0;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	/*
-	 * At this point suspended_count == 0 means we are being run for the
-	 * first time for the given domain in the present cycle.
-	 */
-	genpd_lock(genpd);
-	if (genpd->suspended_count++ == 0) {
-		/*
-		 * The boot kernel might put the domain into arbitrary state,
-		 * so make it appear as powered off to genpd_sync_power_on(),
-		 * so that it tries to power it on in case it was really off.
-		 */
-		genpd->status = GENPD_STATE_OFF;
-	}
-
-	genpd_sync_power_on(genpd, true, 0);
-	genpd_unlock(genpd);
-
-	if (genpd->dev_ops.stop && genpd->dev_ops.start &&
-	    !pm_runtime_status_suspended(dev)) {
-		ret = genpd_start_dev(genpd, dev);
-		if (ret)
-			return ret;
-	}
-
-	return pm_generic_restore_noirq(dev);
-}
-
-/**
- * genpd_complete - Complete power transition of a device in a power domain.
- * @dev: Device to complete the transition of.
- *
- * Complete a power transition of a device (during a system-wide power
- * transition) under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static void genpd_complete(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return;
-
-	pm_generic_complete(dev);
-
-	genpd_lock(genpd);
-
-	genpd->prepared_count--;
-	if (!genpd->prepared_count)
-		genpd_queue_power_off_work(genpd);
-
-	genpd_unlock(genpd);
-}
-
-static void genpd_switch_state(struct device *dev, bool suspend)
-{
-	struct generic_pm_domain *genpd;
-	bool use_lock;
-
-	genpd = dev_to_genpd_safe(dev);
-	if (!genpd)
-		return;
-
-	use_lock = genpd_is_irq_safe(genpd);
-
-	if (use_lock)
-		genpd_lock(genpd);
-
-	if (suspend) {
-		genpd->suspended_count++;
-		genpd_sync_power_off(genpd, use_lock, 0);
-	} else {
-		genpd_sync_power_on(genpd, use_lock, 0);
-		genpd->suspended_count--;
-	}
-
-	if (use_lock)
-		genpd_unlock(genpd);
-}
-
-/**
- * dev_pm_genpd_suspend - Synchronously try to suspend the genpd for @dev
- * @dev: The device that is attached to the genpd, that can be suspended.
- *
- * This routine should typically be called for a device that needs to be
- * suspended during the syscore suspend phase. It may also be called during
- * suspend-to-idle to suspend a corresponding CPU device that is attached to a
- * genpd.
- */
-void dev_pm_genpd_suspend(struct device *dev)
-{
-	genpd_switch_state(dev, true);
-}
-EXPORT_SYMBOL_GPL(dev_pm_genpd_suspend);
-
-/**
- * dev_pm_genpd_resume - Synchronously try to resume the genpd for @dev
- * @dev: The device that is attached to the genpd, which needs to be resumed.
- *
- * This routine should typically be called for a device that needs to be resumed
- * during the syscore resume phase. It may also be called during suspend-to-idle
- * to resume a corresponding CPU device that is attached to a genpd.
- */
-void dev_pm_genpd_resume(struct device *dev)
-{
-	genpd_switch_state(dev, false);
-}
-EXPORT_SYMBOL_GPL(dev_pm_genpd_resume);
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define genpd_prepare		NULL
-#define genpd_suspend_noirq	NULL
-#define genpd_resume_noirq	NULL
-#define genpd_freeze_noirq	NULL
-#define genpd_thaw_noirq	NULL
-#define genpd_poweroff_noirq	NULL
-#define genpd_restore_noirq	NULL
-#define genpd_complete		NULL
-
-#endif /* CONFIG_PM_SLEEP */
-
-static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
-{
-	struct generic_pm_domain_data *gpd_data;
-	int ret;
-
-	ret = dev_pm_get_subsys_data(dev);
-	if (ret)
-		return ERR_PTR(ret);
-
-	gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL);
-	if (!gpd_data) {
-		ret = -ENOMEM;
-		goto err_put;
-	}
-
-	gpd_data->base.dev = dev;
-	gpd_data->td.constraint_changed = true;
-	gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
-	gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
-	gpd_data->next_wakeup = KTIME_MAX;
-
-	spin_lock_irq(&dev->power.lock);
-
-	if (dev->power.subsys_data->domain_data) {
-		ret = -EINVAL;
-		goto err_free;
-	}
-
-	dev->power.subsys_data->domain_data = &gpd_data->base;
-
-	spin_unlock_irq(&dev->power.lock);
-
-	return gpd_data;
-
- err_free:
-	spin_unlock_irq(&dev->power.lock);
-	kfree(gpd_data);
- err_put:
-	dev_pm_put_subsys_data(dev);
-	return ERR_PTR(ret);
-}
-
-static void genpd_free_dev_data(struct device *dev,
-				struct generic_pm_domain_data *gpd_data)
-{
-	spin_lock_irq(&dev->power.lock);
-
-	dev->power.subsys_data->domain_data = NULL;
-
-	spin_unlock_irq(&dev->power.lock);
-
-	kfree(gpd_data);
-	dev_pm_put_subsys_data(dev);
-}
-
-static void genpd_update_cpumask(struct generic_pm_domain *genpd,
-				 int cpu, bool set, unsigned int depth)
-{
-	struct gpd_link *link;
-
-	if (!genpd_is_cpu_domain(genpd))
-		return;
-
-	list_for_each_entry(link, &genpd->child_links, child_node) {
-		struct generic_pm_domain *parent = link->parent;
-
-		genpd_lock_nested(parent, depth + 1);
-		genpd_update_cpumask(parent, cpu, set, depth + 1);
-		genpd_unlock(parent);
-	}
-
-	if (set)
-		cpumask_set_cpu(cpu, genpd->cpus);
-	else
-		cpumask_clear_cpu(cpu, genpd->cpus);
-}
-
-static void genpd_set_cpumask(struct generic_pm_domain *genpd, int cpu)
-{
-	if (cpu >= 0)
-		genpd_update_cpumask(genpd, cpu, true, 0);
-}
-
-static void genpd_clear_cpumask(struct generic_pm_domain *genpd, int cpu)
-{
-	if (cpu >= 0)
-		genpd_update_cpumask(genpd, cpu, false, 0);
-}
-
-static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
-{
-	int cpu;
-
-	if (!genpd_is_cpu_domain(genpd))
-		return -1;
-
-	for_each_possible_cpu(cpu) {
-		if (get_cpu_device(cpu) == dev)
-			return cpu;
-	}
-
-	return -1;
-}
-
-static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
-			    struct device *base_dev)
-{
-	struct generic_pm_domain_data *gpd_data;
-	int ret;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
-		return -EINVAL;
-
-	gpd_data = genpd_alloc_dev_data(dev);
-	if (IS_ERR(gpd_data))
-		return PTR_ERR(gpd_data);
-
-	gpd_data->cpu = genpd_get_cpu(genpd, base_dev);
-
-	ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0;
-	if (ret)
-		goto out;
-
-	genpd_lock(genpd);
-
-	genpd_set_cpumask(genpd, gpd_data->cpu);
-	dev_pm_domain_set(dev, &genpd->domain);
-
-	genpd->device_count++;
-	genpd->max_off_time_changed = true;
-
-	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
-
-	genpd_unlock(genpd);
- out:
-	if (ret)
-		genpd_free_dev_data(dev, gpd_data);
-	else
-		dev_pm_qos_add_notifier(dev, &gpd_data->nb,
-					DEV_PM_QOS_RESUME_LATENCY);
-
-	return ret;
-}
-
-/**
- * pm_genpd_add_device - Add a device to an I/O PM domain.
- * @genpd: PM domain to add the device to.
- * @dev: Device to be added.
- */
-int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
-{
-	int ret;
-
-	mutex_lock(&gpd_list_lock);
-	ret = genpd_add_device(genpd, dev, dev);
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_add_device);
-
-static int genpd_remove_device(struct generic_pm_domain *genpd,
-			       struct device *dev)
-{
-	struct generic_pm_domain_data *gpd_data;
-	struct pm_domain_data *pdd;
-	int ret = 0;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	pdd = dev->power.subsys_data->domain_data;
-	gpd_data = to_gpd_data(pdd);
-	dev_pm_qos_remove_notifier(dev, &gpd_data->nb,
-				   DEV_PM_QOS_RESUME_LATENCY);
-
-	genpd_lock(genpd);
-
-	if (genpd->prepared_count > 0) {
-		ret = -EAGAIN;
-		goto out;
-	}
-
-	genpd->device_count--;
-	genpd->max_off_time_changed = true;
-
-	genpd_clear_cpumask(genpd, gpd_data->cpu);
-	dev_pm_domain_set(dev, NULL);
-
-	list_del_init(&pdd->list_node);
-
-	genpd_unlock(genpd);
-
-	if (genpd->detach_dev)
-		genpd->detach_dev(genpd, dev);
-
-	genpd_free_dev_data(dev, gpd_data);
-
-	return 0;
-
- out:
-	genpd_unlock(genpd);
-	dev_pm_qos_add_notifier(dev, &gpd_data->nb, DEV_PM_QOS_RESUME_LATENCY);
-
-	return ret;
-}
-
-/**
- * pm_genpd_remove_device - Remove a device from an I/O PM domain.
- * @dev: Device to be removed.
- */
-int pm_genpd_remove_device(struct device *dev)
-{
-	struct generic_pm_domain *genpd = dev_to_genpd_safe(dev);
-
-	if (!genpd)
-		return -EINVAL;
-
-	return genpd_remove_device(genpd, dev);
-}
-EXPORT_SYMBOL_GPL(pm_genpd_remove_device);
-
-/**
- * dev_pm_genpd_add_notifier - Add a genpd power on/off notifier for @dev
- *
- * @dev: Device that should be associated with the notifier
- * @nb: The notifier block to register
- *
- * Users may call this function to add a genpd power on/off notifier for an
- * attached @dev. Only one notifier per device is allowed. The notifier is
- * sent when genpd is powering on/off the PM domain.
- *
- * It is assumed that the user guarantee that the genpd wouldn't be detached
- * while this routine is getting called.
- *
- * Returns 0 on success and negative error values on failures.
- */
-int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb)
-{
-	struct generic_pm_domain *genpd;
-	struct generic_pm_domain_data *gpd_data;
-	int ret;
-
-	genpd = dev_to_genpd_safe(dev);
-	if (!genpd)
-		return -ENODEV;
-
-	if (WARN_ON(!dev->power.subsys_data ||
-		     !dev->power.subsys_data->domain_data))
-		return -EINVAL;
-
-	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	if (gpd_data->power_nb)
-		return -EEXIST;
-
-	genpd_lock(genpd);
-	ret = raw_notifier_chain_register(&genpd->power_notifiers, nb);
-	genpd_unlock(genpd);
-
-	if (ret) {
-		dev_warn(dev, "failed to add notifier for PM domain %s\n",
-			 genpd->name);
-		return ret;
-	}
-
-	gpd_data->power_nb = nb;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dev_pm_genpd_add_notifier);
-
-/**
- * dev_pm_genpd_remove_notifier - Remove a genpd power on/off notifier for @dev
- *
- * @dev: Device that is associated with the notifier
- *
- * Users may call this function to remove a genpd power on/off notifier for an
- * attached @dev.
- *
- * It is assumed that the user guarantee that the genpd wouldn't be detached
- * while this routine is getting called.
- *
- * Returns 0 on success and negative error values on failures.
- */
-int dev_pm_genpd_remove_notifier(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	struct generic_pm_domain_data *gpd_data;
-	int ret;
-
-	genpd = dev_to_genpd_safe(dev);
-	if (!genpd)
-		return -ENODEV;
-
-	if (WARN_ON(!dev->power.subsys_data ||
-		     !dev->power.subsys_data->domain_data))
-		return -EINVAL;
-
-	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	if (!gpd_data->power_nb)
-		return -ENODEV;
-
-	genpd_lock(genpd);
-	ret = raw_notifier_chain_unregister(&genpd->power_notifiers,
-					    gpd_data->power_nb);
-	genpd_unlock(genpd);
-
-	if (ret) {
-		dev_warn(dev, "failed to remove notifier for PM domain %s\n",
-			 genpd->name);
-		return ret;
-	}
-
-	gpd_data->power_nb = NULL;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dev_pm_genpd_remove_notifier);
-
-static int genpd_add_subdomain(struct generic_pm_domain *genpd,
-			       struct generic_pm_domain *subdomain)
-{
-	struct gpd_link *link, *itr;
-	int ret = 0;
-
-	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)
-	    || genpd == subdomain)
-		return -EINVAL;
-
-	/*
-	 * If the domain can be powered on/off in an IRQ safe
-	 * context, ensure that the subdomain can also be
-	 * powered on/off in that context.
-	 */
-	if (!genpd_is_irq_safe(genpd) && genpd_is_irq_safe(subdomain)) {
-		WARN(1, "Parent %s of subdomain %s must be IRQ safe\n",
-				genpd->name, subdomain->name);
-		return -EINVAL;
-	}
-
-	link = kzalloc(sizeof(*link), GFP_KERNEL);
-	if (!link)
-		return -ENOMEM;
-
-	genpd_lock(subdomain);
-	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
-
-	if (!genpd_status_on(genpd) && genpd_status_on(subdomain)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	list_for_each_entry(itr, &genpd->parent_links, parent_node) {
-		if (itr->child == subdomain && itr->parent == genpd) {
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
-	link->parent = genpd;
-	list_add_tail(&link->parent_node, &genpd->parent_links);
-	link->child = subdomain;
-	list_add_tail(&link->child_node, &subdomain->child_links);
-	if (genpd_status_on(subdomain))
-		genpd_sd_counter_inc(genpd);
-
- out:
-	genpd_unlock(genpd);
-	genpd_unlock(subdomain);
-	if (ret)
-		kfree(link);
-	return ret;
-}
-
-/**
- * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
- * @genpd: Leader PM domain to add the subdomain to.
- * @subdomain: Subdomain to be added.
- */
-int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
-			   struct generic_pm_domain *subdomain)
-{
-	int ret;
-
-	mutex_lock(&gpd_list_lock);
-	ret = genpd_add_subdomain(genpd, subdomain);
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain);
-
-/**
- * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain.
- * @genpd: Leader PM domain to remove the subdomain from.
- * @subdomain: Subdomain to be removed.
- */
-int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
-			      struct generic_pm_domain *subdomain)
-{
-	struct gpd_link *l, *link;
-	int ret = -EINVAL;
-
-	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
-		return -EINVAL;
-
-	genpd_lock(subdomain);
-	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
-
-	if (!list_empty(&subdomain->parent_links) || subdomain->device_count) {
-		pr_warn("%s: unable to remove subdomain %s\n",
-			genpd->name, subdomain->name);
-		ret = -EBUSY;
-		goto out;
-	}
-
-	list_for_each_entry_safe(link, l, &genpd->parent_links, parent_node) {
-		if (link->child != subdomain)
-			continue;
-
-		list_del(&link->parent_node);
-		list_del(&link->child_node);
-		kfree(link);
-		if (genpd_status_on(subdomain))
-			genpd_sd_counter_dec(genpd);
-
-		ret = 0;
-		break;
-	}
-
-out:
-	genpd_unlock(genpd);
-	genpd_unlock(subdomain);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
-
-static void genpd_free_default_power_state(struct genpd_power_state *states,
-					   unsigned int state_count)
-{
-	kfree(states);
-}
-
-static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
-{
-	struct genpd_power_state *state;
-
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (!state)
-		return -ENOMEM;
-
-	genpd->states = state;
-	genpd->state_count = 1;
-	genpd->free_states = genpd_free_default_power_state;
-
-	return 0;
-}
-
-static void genpd_lock_init(struct generic_pm_domain *genpd)
-{
-	if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
-		spin_lock_init(&genpd->slock);
-		genpd->lock_ops = &genpd_spin_ops;
-	} else {
-		mutex_init(&genpd->mlock);
-		genpd->lock_ops = &genpd_mtx_ops;
-	}
-}
-
-/**
- * pm_genpd_init - Initialize a generic I/O PM domain object.
- * @genpd: PM domain object to initialize.
- * @gov: PM domain governor to associate with the domain (may be NULL).
- * @is_off: Initial value of the domain's power_is_off field.
- *
- * Returns 0 on successful initialization, else a negative error code.
- */
-int pm_genpd_init(struct generic_pm_domain *genpd,
-		  struct dev_power_governor *gov, bool is_off)
-{
-	int ret;
-
-	if (IS_ERR_OR_NULL(genpd))
-		return -EINVAL;
-
-	INIT_LIST_HEAD(&genpd->parent_links);
-	INIT_LIST_HEAD(&genpd->child_links);
-	INIT_LIST_HEAD(&genpd->dev_list);
-	RAW_INIT_NOTIFIER_HEAD(&genpd->power_notifiers);
-	genpd_lock_init(genpd);
-	genpd->gov = gov;
-	INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn);
-	atomic_set(&genpd->sd_count, 0);
-	genpd->status = is_off ? GENPD_STATE_OFF : GENPD_STATE_ON;
-	genpd->device_count = 0;
-	genpd->max_off_time_ns = -1;
-	genpd->max_off_time_changed = true;
-	genpd->provider = NULL;
-	genpd->has_provider = false;
-	genpd->accounting_time = ktime_get();
-	genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
-	genpd->domain.ops.runtime_resume = genpd_runtime_resume;
-	genpd->domain.ops.prepare = genpd_prepare;
-	genpd->domain.ops.suspend_noirq = genpd_suspend_noirq;
-	genpd->domain.ops.resume_noirq = genpd_resume_noirq;
-	genpd->domain.ops.freeze_noirq = genpd_freeze_noirq;
-	genpd->domain.ops.thaw_noirq = genpd_thaw_noirq;
-	genpd->domain.ops.poweroff_noirq = genpd_poweroff_noirq;
-	genpd->domain.ops.restore_noirq = genpd_restore_noirq;
-	genpd->domain.ops.complete = genpd_complete;
-	genpd->domain.start = genpd_dev_pm_start;
-
-	if (genpd->flags & GENPD_FLAG_PM_CLK) {
-		genpd->dev_ops.stop = pm_clk_suspend;
-		genpd->dev_ops.start = pm_clk_resume;
-	}
-
-	/* Always-on domains must be powered on at initialization. */
-	if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) &&
-			!genpd_status_on(genpd))
-		return -EINVAL;
-
-	if (genpd_is_cpu_domain(genpd) &&
-	    !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
-		return -ENOMEM;
-
-	/* Use only one "off" state if there were no states declared */
-	if (genpd->state_count == 0) {
-		ret = genpd_set_default_power_state(genpd);
-		if (ret) {
-			if (genpd_is_cpu_domain(genpd))
-				free_cpumask_var(genpd->cpus);
-			return ret;
-		}
-	} else if (!gov && genpd->state_count > 1) {
-		pr_warn("%s: no governor for states\n", genpd->name);
-	}
-
-	device_initialize(&genpd->dev);
-	dev_set_name(&genpd->dev, "%s", genpd->name);
-
-	mutex_lock(&gpd_list_lock);
-	list_add(&genpd->gpd_list_node, &gpd_list);
-	mutex_unlock(&gpd_list_lock);
-	genpd_debug_add(genpd);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_init);
-
-static int genpd_remove(struct generic_pm_domain *genpd)
-{
-	struct gpd_link *l, *link;
-
-	if (IS_ERR_OR_NULL(genpd))
-		return -EINVAL;
-
-	genpd_lock(genpd);
-
-	if (genpd->has_provider) {
-		genpd_unlock(genpd);
-		pr_err("Provider present, unable to remove %s\n", genpd->name);
-		return -EBUSY;
-	}
-
-	if (!list_empty(&genpd->parent_links) || genpd->device_count) {
-		genpd_unlock(genpd);
-		pr_err("%s: unable to remove %s\n", __func__, genpd->name);
-		return -EBUSY;
-	}
-
-	list_for_each_entry_safe(link, l, &genpd->child_links, child_node) {
-		list_del(&link->parent_node);
-		list_del(&link->child_node);
-		kfree(link);
-	}
-
-	genpd_debug_remove(genpd);
-	list_del(&genpd->gpd_list_node);
-	genpd_unlock(genpd);
-	cancel_work_sync(&genpd->power_off_work);
-	if (genpd_is_cpu_domain(genpd))
-		free_cpumask_var(genpd->cpus);
-	if (genpd->free_states)
-		genpd->free_states(genpd->states, genpd->state_count);
-
-	pr_debug("%s: removed %s\n", __func__, genpd->name);
-
-	return 0;
-}
-
-/**
- * pm_genpd_remove - Remove a generic I/O PM domain
- * @genpd: Pointer to PM domain that is to be removed.
- *
- * To remove the PM domain, this function:
- *  - Removes the PM domain as a subdomain to any parent domains,
- *    if it was added.
- *  - Removes the PM domain from the list of registered PM domains.
- *
- * The PM domain will only be removed, if the associated provider has
- * been removed, it is not a parent to any other PM domain and has no
- * devices associated with it.
- */
-int pm_genpd_remove(struct generic_pm_domain *genpd)
-{
-	int ret;
-
-	mutex_lock(&gpd_list_lock);
-	ret = genpd_remove(genpd);
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_remove);
-
-#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
-
-/*
- * Device Tree based PM domain providers.
- *
- * The code below implements generic device tree based PM domain providers that
- * bind device tree nodes with generic PM domains registered in the system.
- *
- * Any driver that registers generic PM domains and needs to support binding of
- * devices to these domains is supposed to register a PM domain provider, which
- * maps a PM domain specifier retrieved from the device tree to a PM domain.
- *
- * Two simple mapping functions have been provided for convenience:
- *  - genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
- *  - genpd_xlate_onecell() for mapping of multiple PM domains per node by
- *    index.
- */
-
-/**
- * struct of_genpd_provider - PM domain provider registration structure
- * @link: Entry in global list of PM domain providers
- * @node: Pointer to device tree node of PM domain provider
- * @xlate: Provider-specific xlate callback mapping a set of specifier cells
- *         into a PM domain.
- * @data: context pointer to be passed into @xlate callback
- */
-struct of_genpd_provider {
-	struct list_head link;
-	struct device_node *node;
-	genpd_xlate_t xlate;
-	void *data;
-};
-
-/* List of registered PM domain providers. */
-static LIST_HEAD(of_genpd_providers);
-/* Mutex to protect the list above. */
-static DEFINE_MUTEX(of_genpd_mutex);
-
-/**
- * genpd_xlate_simple() - Xlate function for direct node-domain mapping
- * @genpdspec: OF phandle args to map into a PM domain
- * @data: xlate function private data - pointer to struct generic_pm_domain
- *
- * This is a generic xlate function that can be used to model PM domains that
- * have their own device tree nodes. The private data of xlate function needs
- * to be a valid pointer to struct generic_pm_domain.
- */
-static struct generic_pm_domain *genpd_xlate_simple(
-					struct of_phandle_args *genpdspec,
-					void *data)
-{
-	return data;
-}
-
-/**
- * genpd_xlate_onecell() - Xlate function using a single index.
- * @genpdspec: OF phandle args to map into a PM domain
- * @data: xlate function private data - pointer to struct genpd_onecell_data
- *
- * This is a generic xlate function that can be used to model simple PM domain
- * controllers that have one device tree node and provide multiple PM domains.
- * A single cell is used as an index into an array of PM domains specified in
- * the genpd_onecell_data struct when registering the provider.
- */
-static struct generic_pm_domain *genpd_xlate_onecell(
-					struct of_phandle_args *genpdspec,
-					void *data)
-{
-	struct genpd_onecell_data *genpd_data = data;
-	unsigned int idx = genpdspec->args[0];
-
-	if (genpdspec->args_count != 1)
-		return ERR_PTR(-EINVAL);
-
-	if (idx >= genpd_data->num_domains) {
-		pr_err("%s: invalid domain index %u\n", __func__, idx);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (!genpd_data->domains[idx])
-		return ERR_PTR(-ENOENT);
-
-	return genpd_data->domains[idx];
-}
-
-/**
- * genpd_add_provider() - Register a PM domain provider for a node
- * @np: Device node pointer associated with the PM domain provider.
- * @xlate: Callback for decoding PM domain from phandle arguments.
- * @data: Context pointer for @xlate callback.
- */
-static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
-			      void *data)
-{
-	struct of_genpd_provider *cp;
-
-	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
-	if (!cp)
-		return -ENOMEM;
-
-	cp->node = of_node_get(np);
-	cp->data = data;
-	cp->xlate = xlate;
-	fwnode_dev_initialized(&np->fwnode, true);
-
-	mutex_lock(&of_genpd_mutex);
-	list_add(&cp->link, &of_genpd_providers);
-	mutex_unlock(&of_genpd_mutex);
-	pr_debug("Added domain provider from %pOF\n", np);
-
-	return 0;
-}
-
-static bool genpd_present(const struct generic_pm_domain *genpd)
-{
-	bool ret = false;
-	const struct generic_pm_domain *gpd;
-
-	mutex_lock(&gpd_list_lock);
-	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
-		if (gpd == genpd) {
-			ret = true;
-			break;
-		}
-	}
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-
-/**
- * of_genpd_add_provider_simple() - Register a simple PM domain provider
- * @np: Device node pointer associated with the PM domain provider.
- * @genpd: Pointer to PM domain associated with the PM domain provider.
- */
-int of_genpd_add_provider_simple(struct device_node *np,
-				 struct generic_pm_domain *genpd)
-{
-	int ret;
-
-	if (!np || !genpd)
-		return -EINVAL;
-
-	if (!genpd_present(genpd))
-		return -EINVAL;
-
-	genpd->dev.of_node = np;
-
-	/* Parse genpd OPP table */
-	if (genpd->set_performance_state) {
-		ret = dev_pm_opp_of_add_table(&genpd->dev);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
-					ret);
-			return ret;
-		}
-
-		/*
-		 * Save table for faster processing while setting performance
-		 * state.
-		 */
-		genpd->opp_table = dev_pm_opp_get_opp_table(&genpd->dev);
-		WARN_ON(IS_ERR(genpd->opp_table));
-	}
-
-	ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
-	if (ret) {
-		if (genpd->set_performance_state) {
-			dev_pm_opp_put_opp_table(genpd->opp_table);
-			dev_pm_opp_of_remove_table(&genpd->dev);
-		}
-
-		return ret;
-	}
-
-	genpd->provider = &np->fwnode;
-	genpd->has_provider = true;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple);
-
-/**
- * of_genpd_add_provider_onecell() - Register a onecell PM domain provider
- * @np: Device node pointer associated with the PM domain provider.
- * @data: Pointer to the data associated with the PM domain provider.
- */
-int of_genpd_add_provider_onecell(struct device_node *np,
-				  struct genpd_onecell_data *data)
-{
-	struct generic_pm_domain *genpd;
-	unsigned int i;
-	int ret = -EINVAL;
-
-	if (!np || !data)
-		return -EINVAL;
-
-	if (!data->xlate)
-		data->xlate = genpd_xlate_onecell;
-
-	for (i = 0; i < data->num_domains; i++) {
-		genpd = data->domains[i];
-
-		if (!genpd)
-			continue;
-		if (!genpd_present(genpd))
-			goto error;
-
-		genpd->dev.of_node = np;
-
-		/* Parse genpd OPP table */
-		if (genpd->set_performance_state) {
-			ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i);
-			if (ret) {
-				if (ret != -EPROBE_DEFER)
-					dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n",
-						i, ret);
-				goto error;
-			}
-
-			/*
-			 * Save table for faster processing while setting
-			 * performance state.
-			 */
-			genpd->opp_table = dev_pm_opp_get_opp_table(&genpd->dev);
-			WARN_ON(IS_ERR(genpd->opp_table));
-		}
-
-		genpd->provider = &np->fwnode;
-		genpd->has_provider = true;
-	}
-
-	ret = genpd_add_provider(np, data->xlate, data);
-	if (ret < 0)
-		goto error;
-
-	return 0;
-
-error:
-	while (i--) {
-		genpd = data->domains[i];
-
-		if (!genpd)
-			continue;
-
-		genpd->provider = NULL;
-		genpd->has_provider = false;
-
-		if (genpd->set_performance_state) {
-			dev_pm_opp_put_opp_table(genpd->opp_table);
-			dev_pm_opp_of_remove_table(&genpd->dev);
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell);
-
-/**
- * of_genpd_del_provider() - Remove a previously registered PM domain provider
- * @np: Device node pointer associated with the PM domain provider
- */
-void of_genpd_del_provider(struct device_node *np)
-{
-	struct of_genpd_provider *cp, *tmp;
-	struct generic_pm_domain *gpd;
-
-	mutex_lock(&gpd_list_lock);
-	mutex_lock(&of_genpd_mutex);
-	list_for_each_entry_safe(cp, tmp, &of_genpd_providers, link) {
-		if (cp->node == np) {
-			/*
-			 * For each PM domain associated with the
-			 * provider, set the 'has_provider' to false
-			 * so that the PM domain can be safely removed.
-			 */
-			list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
-				if (gpd->provider == &np->fwnode) {
-					gpd->has_provider = false;
-
-					if (!gpd->set_performance_state)
-						continue;
-
-					dev_pm_opp_put_opp_table(gpd->opp_table);
-					dev_pm_opp_of_remove_table(&gpd->dev);
-				}
-			}
-
-			fwnode_dev_initialized(&cp->node->fwnode, false);
-			list_del(&cp->link);
-			of_node_put(cp->node);
-			kfree(cp);
-			break;
-		}
-	}
-	mutex_unlock(&of_genpd_mutex);
-	mutex_unlock(&gpd_list_lock);
-}
-EXPORT_SYMBOL_GPL(of_genpd_del_provider);
-
-/**
- * genpd_get_from_provider() - Look-up PM domain
- * @genpdspec: OF phandle args to use for look-up
- *
- * Looks for a PM domain provider under the node specified by @genpdspec and if
- * found, uses xlate function of the provider to map phandle args to a PM
- * domain.
- *
- * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
- * on failure.
- */
-static struct generic_pm_domain *genpd_get_from_provider(
-					struct of_phandle_args *genpdspec)
-{
-	struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
-	struct of_genpd_provider *provider;
-
-	if (!genpdspec)
-		return ERR_PTR(-EINVAL);
-
-	mutex_lock(&of_genpd_mutex);
-
-	/* Check if we have such a provider in our array */
-	list_for_each_entry(provider, &of_genpd_providers, link) {
-		if (provider->node == genpdspec->np)
-			genpd = provider->xlate(genpdspec, provider->data);
-		if (!IS_ERR(genpd))
-			break;
-	}
-
-	mutex_unlock(&of_genpd_mutex);
-
-	return genpd;
-}
-
-/**
- * of_genpd_add_device() - Add a device to an I/O PM domain
- * @genpdspec: OF phandle args to use for look-up PM domain
- * @dev: Device to be added.
- *
- * Looks-up an I/O PM domain based upon phandle args provided and adds
- * the device to the PM domain. Returns a negative error code on failure.
- */
-int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-	int ret;
-
-	mutex_lock(&gpd_list_lock);
-
-	genpd = genpd_get_from_provider(genpdspec);
-	if (IS_ERR(genpd)) {
-		ret = PTR_ERR(genpd);
-		goto out;
-	}
-
-	ret = genpd_add_device(genpd, dev, dev);
-
-out:
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(of_genpd_add_device);
-
-/**
- * of_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
- * @parent_spec: OF phandle args to use for parent PM domain look-up
- * @subdomain_spec: OF phandle args to use for subdomain look-up
- *
- * Looks-up a parent PM domain and subdomain based upon phandle args
- * provided and adds the subdomain to the parent PM domain. Returns a
- * negative error code on failure.
- */
-int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
-			   struct of_phandle_args *subdomain_spec)
-{
-	struct generic_pm_domain *parent, *subdomain;
-	int ret;
-
-	mutex_lock(&gpd_list_lock);
-
-	parent = genpd_get_from_provider(parent_spec);
-	if (IS_ERR(parent)) {
-		ret = PTR_ERR(parent);
-		goto out;
-	}
-
-	subdomain = genpd_get_from_provider(subdomain_spec);
-	if (IS_ERR(subdomain)) {
-		ret = PTR_ERR(subdomain);
-		goto out;
-	}
-
-	ret = genpd_add_subdomain(parent, subdomain);
-
-out:
-	mutex_unlock(&gpd_list_lock);
-
-	return ret == -ENOENT ? -EPROBE_DEFER : ret;
-}
-EXPORT_SYMBOL_GPL(of_genpd_add_subdomain);
-
-/**
- * of_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain.
- * @parent_spec: OF phandle args to use for parent PM domain look-up
- * @subdomain_spec: OF phandle args to use for subdomain look-up
- *
- * Looks-up a parent PM domain and subdomain based upon phandle args
- * provided and removes the subdomain from the parent PM domain. Returns a
- * negative error code on failure.
- */
-int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec,
-			      struct of_phandle_args *subdomain_spec)
-{
-	struct generic_pm_domain *parent, *subdomain;
-	int ret;
-
-	mutex_lock(&gpd_list_lock);
-
-	parent = genpd_get_from_provider(parent_spec);
-	if (IS_ERR(parent)) {
-		ret = PTR_ERR(parent);
-		goto out;
-	}
-
-	subdomain = genpd_get_from_provider(subdomain_spec);
-	if (IS_ERR(subdomain)) {
-		ret = PTR_ERR(subdomain);
-		goto out;
-	}
-
-	ret = pm_genpd_remove_subdomain(parent, subdomain);
-
-out:
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(of_genpd_remove_subdomain);
-
-/**
- * of_genpd_remove_last - Remove the last PM domain registered for a provider
- * @np: Pointer to device node associated with provider
- *
- * Find the last PM domain that was added by a particular provider and
- * remove this PM domain from the list of PM domains. The provider is
- * identified by the 'provider' device structure that is passed. The PM
- * domain will only be removed, if the provider associated with domain
- * has been removed.
- *
- * Returns a valid pointer to struct generic_pm_domain on success or
- * ERR_PTR() on failure.
- */
-struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
-{
-	struct generic_pm_domain *gpd, *tmp, *genpd = ERR_PTR(-ENOENT);
-	int ret;
-
-	if (IS_ERR_OR_NULL(np))
-		return ERR_PTR(-EINVAL);
-
-	mutex_lock(&gpd_list_lock);
-	list_for_each_entry_safe(gpd, tmp, &gpd_list, gpd_list_node) {
-		if (gpd->provider == &np->fwnode) {
-			ret = genpd_remove(gpd);
-			genpd = ret ? ERR_PTR(ret) : gpd;
-			break;
-		}
-	}
-	mutex_unlock(&gpd_list_lock);
-
-	return genpd;
-}
-EXPORT_SYMBOL_GPL(of_genpd_remove_last);
-
-static void genpd_release_dev(struct device *dev)
-{
-	of_node_put(dev->of_node);
-	kfree(dev);
-}
-
-static struct bus_type genpd_bus_type = {
-	.name		= "genpd",
-};
-
-/**
- * genpd_dev_pm_detach - Detach a device from its PM domain.
- * @dev: Device to detach.
- * @power_off: Currently not used
- *
- * Try to locate a corresponding generic PM domain, which the device was
- * attached to previously. If such is found, the device is detached from it.
- */
-static void genpd_dev_pm_detach(struct device *dev, bool power_off)
-{
-	struct generic_pm_domain *pd;
-	unsigned int i;
-	int ret = 0;
-
-	pd = dev_to_genpd(dev);
-	if (IS_ERR(pd))
-		return;
-
-	dev_dbg(dev, "removing from PM domain %s\n", pd->name);
-
-	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
-		ret = genpd_remove_device(pd, dev);
-		if (ret != -EAGAIN)
-			break;
-
-		mdelay(i);
-		cond_resched();
-	}
-
-	if (ret < 0) {
-		dev_err(dev, "failed to remove from PM domain %s: %d",
-			pd->name, ret);
-		return;
-	}
-
-	/* Check if PM domain can be powered off after removing this device. */
-	genpd_queue_power_off_work(pd);
-
-	/* Unregister the device if it was created by genpd. */
-	if (dev->bus == &genpd_bus_type)
-		device_unregister(dev);
-}
-
-static void genpd_dev_pm_sync(struct device *dev)
-{
-	struct generic_pm_domain *pd;
-
-	pd = dev_to_genpd(dev);
-	if (IS_ERR(pd))
-		return;
-
-	genpd_queue_power_off_work(pd);
-}
-
-static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
-				 unsigned int index, bool power_on)
-{
-	struct of_phandle_args pd_args;
-	struct generic_pm_domain *pd;
-	int ret;
-
-	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
-				"#power-domain-cells", index, &pd_args);
-	if (ret < 0)
-		return ret;
-
-	mutex_lock(&gpd_list_lock);
-	pd = genpd_get_from_provider(&pd_args);
-	of_node_put(pd_args.np);
-	if (IS_ERR(pd)) {
-		mutex_unlock(&gpd_list_lock);
-		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
-			__func__, PTR_ERR(pd));
-		return driver_deferred_probe_check_state(base_dev);
-	}
-
-	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
-
-	ret = genpd_add_device(pd, dev, base_dev);
-	mutex_unlock(&gpd_list_lock);
-
-	if (ret < 0) {
-		if (ret != -EPROBE_DEFER)
-			dev_err(dev, "failed to add to PM domain %s: %d",
-				pd->name, ret);
-		return ret;
-	}
-
-	dev->pm_domain->detach = genpd_dev_pm_detach;
-	dev->pm_domain->sync = genpd_dev_pm_sync;
-
-	if (power_on) {
-		genpd_lock(pd);
-		ret = genpd_power_on(pd, 0);
-		genpd_unlock(pd);
-	}
-
-	if (ret)
-		genpd_remove_device(pd, dev);
-
-	return ret ? -EPROBE_DEFER : 1;
-}
-
-/**
- * genpd_dev_pm_attach - Attach a device to its PM domain using DT.
- * @dev: Device to attach.
- *
- * Parse device's OF node to find a PM domain specifier. If such is found,
- * attaches the device to retrieved pm_domain ops.
- *
- * Returns 1 on successfully attached PM domain, 0 when the device don't need a
- * PM domain or when multiple power-domains exists for it, else a negative error
- * code. Note that if a power-domain exists for the device, but it cannot be
- * found or turned on, then return -EPROBE_DEFER to ensure that the device is
- * not probed and to re-try again later.
- */
-int genpd_dev_pm_attach(struct device *dev)
-{
-	if (!dev->of_node)
-		return 0;
-
-	/*
-	 * Devices with multiple PM domains must be attached separately, as we
-	 * can only attach one PM domain per device.
-	 */
-	if (of_count_phandle_with_args(dev->of_node, "power-domains",
-				       "#power-domain-cells") != 1)
-		return 0;
-
-	return __genpd_dev_pm_attach(dev, dev, 0, true);
-}
-EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
-
-/**
- * genpd_dev_pm_attach_by_id - Associate a device with one of its PM domains.
- * @dev: The device used to lookup the PM domain.
- * @index: The index of the PM domain.
- *
- * Parse device's OF node to find a PM domain specifier at the provided @index.
- * If such is found, creates a virtual device and attaches it to the retrieved
- * pm_domain ops. To deal with detaching of the virtual device, the ->detach()
- * callback in the struct dev_pm_domain are assigned to genpd_dev_pm_detach().
- *
- * Returns the created virtual device if successfully attached PM domain, NULL
- * when the device don't need a PM domain, else an ERR_PTR() in case of
- * failures. If a power-domain exists for the device, but cannot be found or
- * turned on, then ERR_PTR(-EPROBE_DEFER) is returned to ensure that the device
- * is not probed and to re-try again later.
- */
-struct device *genpd_dev_pm_attach_by_id(struct device *dev,
-					 unsigned int index)
-{
-	struct device *virt_dev;
-	int num_domains;
-	int ret;
-
-	if (!dev->of_node)
-		return NULL;
-
-	/* Verify that the index is within a valid range. */
-	num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
-						 "#power-domain-cells");
-	if (index >= num_domains)
-		return NULL;
-
-	/* Allocate and register device on the genpd bus. */
-	virt_dev = kzalloc(sizeof(*virt_dev), GFP_KERNEL);
-	if (!virt_dev)
-		return ERR_PTR(-ENOMEM);
-
-	dev_set_name(virt_dev, "genpd:%u:%s", index, dev_name(dev));
-	virt_dev->bus = &genpd_bus_type;
-	virt_dev->release = genpd_release_dev;
-	virt_dev->of_node = of_node_get(dev->of_node);
-
-	ret = device_register(virt_dev);
-	if (ret) {
-		put_device(virt_dev);
-		return ERR_PTR(ret);
-	}
-
-	/* Try to attach the device to the PM domain at the specified index. */
-	ret = __genpd_dev_pm_attach(virt_dev, dev, index, false);
-	if (ret < 1) {
-		device_unregister(virt_dev);
-		return ret ? ERR_PTR(ret) : NULL;
-	}
-
-	pm_runtime_enable(virt_dev);
-	genpd_queue_power_off_work(dev_to_genpd(virt_dev));
-
-	return virt_dev;
-}
-EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
-
-/**
- * genpd_dev_pm_attach_by_name - Associate a device with one of its PM domains.
- * @dev: The device used to lookup the PM domain.
- * @name: The name of the PM domain.
- *
- * Parse device's OF node to find a PM domain specifier using the
- * power-domain-names DT property. For further description see
- * genpd_dev_pm_attach_by_id().
- */
-struct device *genpd_dev_pm_attach_by_name(struct device *dev, const char *name)
-{
-	int index;
-
-	if (!dev->of_node)
-		return NULL;
-
-	index = of_property_match_string(dev->of_node, "power-domain-names",
-					 name);
-	if (index < 0)
-		return NULL;
-
-	return genpd_dev_pm_attach_by_id(dev, index);
-}
-
-static const struct of_device_id idle_state_match[] = {
-	{ .compatible = "domain-idle-state", },
-	{ }
-};
-
-static int genpd_parse_state(struct genpd_power_state *genpd_state,
-				    struct device_node *state_node)
-{
-	int err;
-	u32 residency;
-	u32 entry_latency, exit_latency;
-
-	err = of_property_read_u32(state_node, "entry-latency-us",
-						&entry_latency);
-	if (err) {
-		pr_debug(" * %pOF missing entry-latency-us property\n",
-			 state_node);
-		return -EINVAL;
-	}
-
-	err = of_property_read_u32(state_node, "exit-latency-us",
-						&exit_latency);
-	if (err) {
-		pr_debug(" * %pOF missing exit-latency-us property\n",
-			 state_node);
-		return -EINVAL;
-	}
-
-	err = of_property_read_u32(state_node, "min-residency-us", &residency);
-	if (!err)
-		genpd_state->residency_ns = 1000 * residency;
-
-	genpd_state->power_on_latency_ns = 1000 * exit_latency;
-	genpd_state->power_off_latency_ns = 1000 * entry_latency;
-	genpd_state->fwnode = &state_node->fwnode;
-
-	return 0;
-}
-
-static int genpd_iterate_idle_states(struct device_node *dn,
-				     struct genpd_power_state *states)
-{
-	int ret;
-	struct of_phandle_iterator it;
-	struct device_node *np;
-	int i = 0;
-
-	ret = of_count_phandle_with_args(dn, "domain-idle-states", NULL);
-	if (ret <= 0)
-		return ret == -ENOENT ? 0 : ret;
-
-	/* Loop over the phandles until all the requested entry is found */
-	of_for_each_phandle(&it, ret, dn, "domain-idle-states", NULL, 0) {
-		np = it.node;
-		if (!of_match_node(idle_state_match, np))
-			continue;
-		if (states) {
-			ret = genpd_parse_state(&states[i], np);
-			if (ret) {
-				pr_err("Parsing idle state node %pOF failed with err %d\n",
-				       np, ret);
-				of_node_put(np);
-				return ret;
-			}
-		}
-		i++;
-	}
-
-	return i;
-}
-
-/**
- * of_genpd_parse_idle_states: Return array of idle states for the genpd.
- *
- * @dn: The genpd device node
- * @states: The pointer to which the state array will be saved.
- * @n: The count of elements in the array returned from this function.
- *
- * Returns the device states parsed from the OF node. The memory for the states
- * is allocated by this function and is the responsibility of the caller to
- * free the memory after use. If any or zero compatible domain idle states is
- * found it returns 0 and in case of errors, a negative error code is returned.
- */
-int of_genpd_parse_idle_states(struct device_node *dn,
-			struct genpd_power_state **states, int *n)
-{
-	struct genpd_power_state *st;
-	int ret;
-
-	ret = genpd_iterate_idle_states(dn, NULL);
-	if (ret < 0)
-		return ret;
-
-	if (!ret) {
-		*states = NULL;
-		*n = 0;
-		return 0;
-	}
-
-	st = kcalloc(ret, sizeof(*st), GFP_KERNEL);
-	if (!st)
-		return -ENOMEM;
-
-	ret = genpd_iterate_idle_states(dn, st);
-	if (ret <= 0) {
-		kfree(st);
-		return ret < 0 ? ret : -EINVAL;
-	}
-
-	*states = st;
-	*n = ret;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states);
-
-/**
- * pm_genpd_opp_to_performance_state - Gets performance state of the genpd from its OPP node.
- *
- * @genpd_dev: Genpd's device for which the performance-state needs to be found.
- * @opp: struct dev_pm_opp of the OPP for which we need to find performance
- *	state.
- *
- * Returns performance state encoded in the OPP of the genpd. This calls
- * platform specific genpd->opp_to_performance_state() callback to translate
- * power domain OPP to performance state.
- *
- * Returns performance state on success and 0 on failure.
- */
-unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev,
-					       struct dev_pm_opp *opp)
-{
-	struct generic_pm_domain *genpd = NULL;
-	int state;
-
-	genpd = container_of(genpd_dev, struct generic_pm_domain, dev);
-
-	if (unlikely(!genpd->opp_to_performance_state))
-		return 0;
-
-	genpd_lock(genpd);
-	state = genpd->opp_to_performance_state(genpd, opp);
-	genpd_unlock(genpd);
-
-	return state;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_opp_to_performance_state);
-
-static int __init genpd_bus_init(void)
-{
-	return bus_register(&genpd_bus_type);
-}
-core_initcall(genpd_bus_init);
-
-#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
-
-
-/***        debugfs support        ***/
-
-#ifdef CONFIG_DEBUG_FS
-/*
- * TODO: This function is a slightly modified version of rtpm_status_show
- * from sysfs.c, so generalize it.
- */
-static void rtpm_status_str(struct seq_file *s, struct device *dev)
-{
-	static const char * const status_lookup[] = {
-		[RPM_ACTIVE] = "active",
-		[RPM_RESUMING] = "resuming",
-		[RPM_SUSPENDED] = "suspended",
-		[RPM_SUSPENDING] = "suspending"
-	};
-	const char *p = "";
-
-	if (dev->power.runtime_error)
-		p = "error";
-	else if (dev->power.disable_depth)
-		p = "unsupported";
-	else if (dev->power.runtime_status < ARRAY_SIZE(status_lookup))
-		p = status_lookup[dev->power.runtime_status];
-	else
-		WARN_ON(1);
-
-	seq_printf(s, "%-25s  ", p);
-}
-
-static void perf_status_str(struct seq_file *s, struct device *dev)
-{
-	struct generic_pm_domain_data *gpd_data;
-
-	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	seq_put_decimal_ull(s, "", gpd_data->performance_state);
-}
-
-static int genpd_summary_one(struct seq_file *s,
-			struct generic_pm_domain *genpd)
-{
-	static const char * const status_lookup[] = {
-		[GENPD_STATE_ON] = "on",
-		[GENPD_STATE_OFF] = "off"
-	};
-	struct pm_domain_data *pm_data;
-	const char *kobj_path;
-	struct gpd_link *link;
-	char state[16];
-	int ret;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup)))
-		goto exit;
-	if (!genpd_status_on(genpd))
-		snprintf(state, sizeof(state), "%s-%u",
-			 status_lookup[genpd->status], genpd->state_idx);
-	else
-		snprintf(state, sizeof(state), "%s",
-			 status_lookup[genpd->status]);
-	seq_printf(s, "%-30s  %-50s %u", genpd->name, state, genpd->performance_state);
-
-	/*
-	 * Modifications on the list require holding locks on both
-	 * parent and child, so we are safe.
-	 * Also genpd->name is immutable.
-	 */
-	list_for_each_entry(link, &genpd->parent_links, parent_node) {
-		if (list_is_first(&link->parent_node, &genpd->parent_links))
-			seq_printf(s, "\n%48s", " ");
-		seq_printf(s, "%s", link->child->name);
-		if (!list_is_last(&link->parent_node, &genpd->parent_links))
-			seq_puts(s, ", ");
-	}
-
-	list_for_each_entry(pm_data, &genpd->dev_list, list_node) {
-		kobj_path = kobject_get_path(&pm_data->dev->kobj,
-				genpd_is_irq_safe(genpd) ?
-				GFP_ATOMIC : GFP_KERNEL);
-		if (kobj_path == NULL)
-			continue;
-
-		seq_printf(s, "\n    %-50s  ", kobj_path);
-		rtpm_status_str(s, pm_data->dev);
-		perf_status_str(s, pm_data->dev);
-		kfree(kobj_path);
-	}
-
-	seq_puts(s, "\n");
-exit:
-	genpd_unlock(genpd);
-
-	return 0;
-}
-
-static int summary_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd;
-	int ret = 0;
-
-	seq_puts(s, "domain                          status          children                           performance\n");
-	seq_puts(s, "    /device                                             runtime status\n");
-	seq_puts(s, "----------------------------------------------------------------------------------------------\n");
-
-	ret = mutex_lock_interruptible(&gpd_list_lock);
-	if (ret)
-		return -ERESTARTSYS;
-
-	list_for_each_entry(genpd, &gpd_list, gpd_list_node) {
-		ret = genpd_summary_one(s, genpd);
-		if (ret)
-			break;
-	}
-	mutex_unlock(&gpd_list_lock);
-
-	return ret;
-}
-
-static int status_show(struct seq_file *s, void *data)
-{
-	static const char * const status_lookup[] = {
-		[GENPD_STATE_ON] = "on",
-		[GENPD_STATE_OFF] = "off"
-	};
-
-	struct generic_pm_domain *genpd = s->private;
-	int ret = 0;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	if (WARN_ON_ONCE(genpd->status >= ARRAY_SIZE(status_lookup)))
-		goto exit;
-
-	if (genpd->status == GENPD_STATE_OFF)
-		seq_printf(s, "%s-%u\n", status_lookup[genpd->status],
-			genpd->state_idx);
-	else
-		seq_printf(s, "%s\n", status_lookup[genpd->status]);
-exit:
-	genpd_unlock(genpd);
-	return ret;
-}
-
-static int sub_domains_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd = s->private;
-	struct gpd_link *link;
-	int ret = 0;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	list_for_each_entry(link, &genpd->parent_links, parent_node)
-		seq_printf(s, "%s\n", link->child->name);
-
-	genpd_unlock(genpd);
-	return ret;
-}
-
-static int idle_states_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd = s->private;
-	unsigned int i;
-	int ret = 0;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	seq_puts(s, "State          Time Spent(ms) Usage          Rejected\n");
-
-	for (i = 0; i < genpd->state_count; i++) {
-		ktime_t delta = 0;
-		s64 msecs;
-
-		if ((genpd->status == GENPD_STATE_OFF) &&
-				(genpd->state_idx == i))
-			delta = ktime_sub(ktime_get(), genpd->accounting_time);
-
-		msecs = ktime_to_ms(
-			ktime_add(genpd->states[i].idle_time, delta));
-		seq_printf(s, "S%-13i %-14lld %-14llu %llu\n", i, msecs,
-			      genpd->states[i].usage, genpd->states[i].rejected);
-	}
-
-	genpd_unlock(genpd);
-	return ret;
-}
-
-static int active_time_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd = s->private;
-	ktime_t delta = 0;
-	int ret = 0;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	if (genpd->status == GENPD_STATE_ON)
-		delta = ktime_sub(ktime_get(), genpd->accounting_time);
-
-	seq_printf(s, "%lld ms\n", ktime_to_ms(
-				ktime_add(genpd->on_time, delta)));
-
-	genpd_unlock(genpd);
-	return ret;
-}
-
-static int total_idle_time_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd = s->private;
-	ktime_t delta = 0, total = 0;
-	unsigned int i;
-	int ret = 0;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	for (i = 0; i < genpd->state_count; i++) {
-
-		if ((genpd->status == GENPD_STATE_OFF) &&
-				(genpd->state_idx == i))
-			delta = ktime_sub(ktime_get(), genpd->accounting_time);
-
-		total = ktime_add(total, genpd->states[i].idle_time);
-	}
-	total = ktime_add(total, delta);
-
-	seq_printf(s, "%lld ms\n", ktime_to_ms(total));
-
-	genpd_unlock(genpd);
-	return ret;
-}
-
-
-static int devices_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd = s->private;
-	struct pm_domain_data *pm_data;
-	const char *kobj_path;
-	int ret = 0;
-
-	ret = genpd_lock_interruptible(genpd);
-	if (ret)
-		return -ERESTARTSYS;
-
-	list_for_each_entry(pm_data, &genpd->dev_list, list_node) {
-		kobj_path = kobject_get_path(&pm_data->dev->kobj,
-				genpd_is_irq_safe(genpd) ?
-				GFP_ATOMIC : GFP_KERNEL);
-		if (kobj_path == NULL)
-			continue;
-
-		seq_printf(s, "%s\n", kobj_path);
-		kfree(kobj_path);
-	}
-
-	genpd_unlock(genpd);
-	return ret;
-}
-
-static int perf_state_show(struct seq_file *s, void *data)
-{
-	struct generic_pm_domain *genpd = s->private;
-
-	if (genpd_lock_interruptible(genpd))
-		return -ERESTARTSYS;
-
-	seq_printf(s, "%u\n", genpd->performance_state);
-
-	genpd_unlock(genpd);
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(summary);
-DEFINE_SHOW_ATTRIBUTE(status);
-DEFINE_SHOW_ATTRIBUTE(sub_domains);
-DEFINE_SHOW_ATTRIBUTE(idle_states);
-DEFINE_SHOW_ATTRIBUTE(active_time);
-DEFINE_SHOW_ATTRIBUTE(total_idle_time);
-DEFINE_SHOW_ATTRIBUTE(devices);
-DEFINE_SHOW_ATTRIBUTE(perf_state);
-
-static void genpd_debug_add(struct generic_pm_domain *genpd)
-{
-	struct dentry *d;
-
-	if (!genpd_debugfs_dir)
-		return;
-
-	d = debugfs_create_dir(genpd->name, genpd_debugfs_dir);
-
-	debugfs_create_file("current_state", 0444,
-			    d, genpd, &status_fops);
-	debugfs_create_file("sub_domains", 0444,
-			    d, genpd, &sub_domains_fops);
-	debugfs_create_file("idle_states", 0444,
-			    d, genpd, &idle_states_fops);
-	debugfs_create_file("active_time", 0444,
-			    d, genpd, &active_time_fops);
-	debugfs_create_file("total_idle_time", 0444,
-			    d, genpd, &total_idle_time_fops);
-	debugfs_create_file("devices", 0444,
-			    d, genpd, &devices_fops);
-	if (genpd->set_performance_state)
-		debugfs_create_file("perf_state", 0444,
-				    d, genpd, &perf_state_fops);
-}
-
-static int __init genpd_debug_init(void)
-{
-	struct generic_pm_domain *genpd;
-
-	genpd_debugfs_dir = debugfs_create_dir("pm_genpd", NULL);
-
-	debugfs_create_file("pm_genpd_summary", S_IRUGO, genpd_debugfs_dir,
-			    NULL, &summary_fops);
-
-	list_for_each_entry(genpd, &gpd_list, gpd_list_node)
-		genpd_debug_add(genpd);
-
-	return 0;
-}
-late_initcall(genpd_debug_init);
-
-static void __exit genpd_debug_exit(void)
-{
-	debugfs_remove_recursive(genpd_debugfs_dir);
-}
-__exitcall(genpd_debug_exit);
-#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
deleted file mode 100644
index cd08c5885190..000000000000
--- a/drivers/base/power/domain_governor.c
+++ /dev/null
@@ -1,406 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/base/power/domain_governor.c - Governors for device PM domains.
- *
- * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
- */
-#include <linux/kernel.h>
-#include <linux/pm_domain.h>
-#include <linux/pm_qos.h>
-#include <linux/hrtimer.h>
-#include <linux/cpuidle.h>
-#include <linux/cpumask.h>
-#include <linux/ktime.h>
-
-static int dev_update_qos_constraint(struct device *dev, void *data)
-{
-	s64 *constraint_ns_p = data;
-	s64 constraint_ns;
-
-	if (dev->power.subsys_data && dev->power.subsys_data->domain_data) {
-		/*
-		 * Only take suspend-time QoS constraints of devices into
-		 * account, because constraints updated after the device has
-		 * been suspended are not guaranteed to be taken into account
-		 * anyway.  In order for them to take effect, the device has to
-		 * be resumed and suspended again.
-		 */
-		constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns;
-	} else {
-		/*
-		 * The child is not in a domain and there's no info on its
-		 * suspend/resume latencies, so assume them to be negligible and
-		 * take its current PM QoS constraint (that's the only thing
-		 * known at this point anyway).
-		 */
-		constraint_ns = dev_pm_qos_read_value(dev, DEV_PM_QOS_RESUME_LATENCY);
-		constraint_ns *= NSEC_PER_USEC;
-	}
-
-	if (constraint_ns < *constraint_ns_p)
-		*constraint_ns_p = constraint_ns;
-
-	return 0;
-}
-
-/**
- * default_suspend_ok - Default PM domain governor routine to suspend devices.
- * @dev: Device to check.
- */
-static bool default_suspend_ok(struct device *dev)
-{
-	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
-	unsigned long flags;
-	s64 constraint_ns;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	spin_lock_irqsave(&dev->power.lock, flags);
-
-	if (!td->constraint_changed) {
-		bool ret = td->cached_suspend_ok;
-
-		spin_unlock_irqrestore(&dev->power.lock, flags);
-		return ret;
-	}
-	td->constraint_changed = false;
-	td->cached_suspend_ok = false;
-	td->effective_constraint_ns = 0;
-	constraint_ns = __dev_pm_qos_resume_latency(dev);
-
-	spin_unlock_irqrestore(&dev->power.lock, flags);
-
-	if (constraint_ns == 0)
-		return false;
-
-	constraint_ns *= NSEC_PER_USEC;
-	/*
-	 * We can walk the children without any additional locking, because
-	 * they all have been suspended at this point and their
-	 * effective_constraint_ns fields won't be modified in parallel with us.
-	 */
-	if (!dev->power.ignore_children)
-		device_for_each_child(dev, &constraint_ns,
-				      dev_update_qos_constraint);
-
-	if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS) {
-		/* "No restriction", so the device is allowed to suspend. */
-		td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
-		td->cached_suspend_ok = true;
-	} else if (constraint_ns == 0) {
-		/*
-		 * This triggers if one of the children that don't belong to a
-		 * domain has a zero PM QoS constraint and it's better not to
-		 * suspend then.  effective_constraint_ns is zero already and
-		 * cached_suspend_ok is false, so bail out.
-		 */
-		return false;
-	} else {
-		constraint_ns -= td->suspend_latency_ns +
-				td->resume_latency_ns;
-		/*
-		 * effective_constraint_ns is zero already and cached_suspend_ok
-		 * is false, so if the computed value is not positive, return
-		 * right away.
-		 */
-		if (constraint_ns <= 0)
-			return false;
-
-		td->effective_constraint_ns = constraint_ns;
-		td->cached_suspend_ok = true;
-	}
-
-	/*
-	 * The children have been suspended already, so we don't need to take
-	 * their suspend latencies into account here.
-	 */
-	return td->cached_suspend_ok;
-}
-
-static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t now)
-{
-	ktime_t domain_wakeup = KTIME_MAX;
-	ktime_t next_wakeup;
-	struct pm_domain_data *pdd;
-	struct gpd_link *link;
-
-	if (!(genpd->flags & GENPD_FLAG_MIN_RESIDENCY))
-		return;
-
-	/*
-	 * Devices that have a predictable wakeup pattern, may specify
-	 * their next wakeup. Let's find the next wakeup from all the
-	 * devices attached to this domain and from all the sub-domains.
-	 * It is possible that component's a next wakeup may have become
-	 * stale when we read that here. We will ignore to ensure the domain
-	 * is able to enter its optimal idle state.
-	 */
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		next_wakeup = to_gpd_data(pdd)->next_wakeup;
-		if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
-			if (ktime_before(next_wakeup, domain_wakeup))
-				domain_wakeup = next_wakeup;
-	}
-
-	list_for_each_entry(link, &genpd->parent_links, parent_node) {
-		next_wakeup = link->child->next_wakeup;
-		if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
-			if (ktime_before(next_wakeup, domain_wakeup))
-				domain_wakeup = next_wakeup;
-	}
-
-	genpd->next_wakeup = domain_wakeup;
-}
-
-static bool next_wakeup_allows_state(struct generic_pm_domain *genpd,
-				     unsigned int state, ktime_t now)
-{
-	ktime_t domain_wakeup = genpd->next_wakeup;
-	s64 idle_time_ns, min_sleep_ns;
-
-	min_sleep_ns = genpd->states[state].power_off_latency_ns +
-		       genpd->states[state].residency_ns;
-
-	idle_time_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
-
-	return idle_time_ns >= min_sleep_ns;
-}
-
-static bool __default_power_down_ok(struct dev_pm_domain *pd,
-				     unsigned int state)
-{
-	struct generic_pm_domain *genpd = pd_to_genpd(pd);
-	struct gpd_link *link;
-	struct pm_domain_data *pdd;
-	s64 min_off_time_ns;
-	s64 off_on_time_ns;
-
-	off_on_time_ns = genpd->states[state].power_off_latency_ns +
-		genpd->states[state].power_on_latency_ns;
-
-	min_off_time_ns = -1;
-	/*
-	 * Check if subdomains can be off for enough time.
-	 *
-	 * All subdomains have been powered off already at this point.
-	 */
-	list_for_each_entry(link, &genpd->parent_links, parent_node) {
-		struct generic_pm_domain *sd = link->child;
-		s64 sd_max_off_ns = sd->max_off_time_ns;
-
-		if (sd_max_off_ns < 0)
-			continue;
-
-		/*
-		 * Check if the subdomain is allowed to be off long enough for
-		 * the current domain to turn off and on (that's how much time
-		 * it will have to wait worst case).
-		 */
-		if (sd_max_off_ns <= off_on_time_ns)
-			return false;
-
-		if (min_off_time_ns > sd_max_off_ns || min_off_time_ns < 0)
-			min_off_time_ns = sd_max_off_ns;
-	}
-
-	/*
-	 * Check if the devices in the domain can be off enough time.
-	 */
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		struct gpd_timing_data *td;
-		s64 constraint_ns;
-
-		/*
-		 * Check if the device is allowed to be off long enough for the
-		 * domain to turn off and on (that's how much time it will
-		 * have to wait worst case).
-		 */
-		td = &to_gpd_data(pdd)->td;
-		constraint_ns = td->effective_constraint_ns;
-		/*
-		 * Zero means "no suspend at all" and this runs only when all
-		 * devices in the domain are suspended, so it must be positive.
-		 */
-		if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS)
-			continue;
-
-		if (constraint_ns <= off_on_time_ns)
-			return false;
-
-		if (min_off_time_ns > constraint_ns || min_off_time_ns < 0)
-			min_off_time_ns = constraint_ns;
-	}
-
-	/*
-	 * If the computed minimum device off time is negative, there are no
-	 * latency constraints, so the domain can spend arbitrary time in the
-	 * "off" state.
-	 */
-	if (min_off_time_ns < 0)
-		return true;
-
-	/*
-	 * The difference between the computed minimum subdomain or device off
-	 * time and the time needed to turn the domain on is the maximum
-	 * theoretical time this domain can spend in the "off" state.
-	 */
-	genpd->max_off_time_ns = min_off_time_ns -
-		genpd->states[state].power_on_latency_ns;
-	return true;
-}
-
-/**
- * _default_power_down_ok - Default generic PM domain power off governor routine.
- * @pd: PM domain to check.
- * @now: current ktime.
- *
- * This routine must be executed under the PM domain's lock.
- */
-static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
-{
-	struct generic_pm_domain *genpd = pd_to_genpd(pd);
-	int state_idx = genpd->state_count - 1;
-	struct gpd_link *link;
-
-	/*
-	 * Find the next wakeup from devices that can determine their own wakeup
-	 * to find when the domain would wakeup and do it for every device down
-	 * the hierarchy. It is not worth while to sleep if the state's residency
-	 * cannot be met.
-	 */
-	update_domain_next_wakeup(genpd, now);
-	if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) {
-		/* Let's find out the deepest domain idle state, the devices prefer */
-		while (state_idx >= 0) {
-			if (next_wakeup_allows_state(genpd, state_idx, now)) {
-				genpd->max_off_time_changed = true;
-				break;
-			}
-			state_idx--;
-		}
-
-		if (state_idx < 0) {
-			state_idx = 0;
-			genpd->cached_power_down_ok = false;
-			goto done;
-		}
-	}
-
-	if (!genpd->max_off_time_changed) {
-		genpd->state_idx = genpd->cached_power_down_state_idx;
-		return genpd->cached_power_down_ok;
-	}
-
-	/*
-	 * We have to invalidate the cached results for the parents, so
-	 * use the observation that default_power_down_ok() is not
-	 * going to be called for any parent until this instance
-	 * returns.
-	 */
-	list_for_each_entry(link, &genpd->child_links, child_node)
-		link->parent->max_off_time_changed = true;
-
-	genpd->max_off_time_ns = -1;
-	genpd->max_off_time_changed = false;
-	genpd->cached_power_down_ok = true;
-
-	/*
-	 * Find a state to power down to, starting from the state
-	 * determined by the next wakeup.
-	 */
-	while (!__default_power_down_ok(pd, state_idx)) {
-		if (state_idx == 0) {
-			genpd->cached_power_down_ok = false;
-			break;
-		}
-		state_idx--;
-	}
-
-done:
-	genpd->state_idx = state_idx;
-	genpd->cached_power_down_state_idx = genpd->state_idx;
-	return genpd->cached_power_down_ok;
-}
-
-static bool default_power_down_ok(struct dev_pm_domain *pd)
-{
-	return _default_power_down_ok(pd, ktime_get());
-}
-
-static bool always_on_power_down_ok(struct dev_pm_domain *domain)
-{
-	return false;
-}
-
-#ifdef CONFIG_CPU_IDLE
-static bool cpu_power_down_ok(struct dev_pm_domain *pd)
-{
-	struct generic_pm_domain *genpd = pd_to_genpd(pd);
-	struct cpuidle_device *dev;
-	ktime_t domain_wakeup, next_hrtimer;
-	ktime_t now = ktime_get();
-	s64 idle_duration_ns;
-	int cpu, i;
-
-	/* Validate dev PM QoS constraints. */
-	if (!_default_power_down_ok(pd, now))
-		return false;
-
-	if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
-		return true;
-
-	/*
-	 * Find the next wakeup for any of the online CPUs within the PM domain
-	 * and its subdomains. Note, we only need the genpd->cpus, as it already
-	 * contains a mask of all CPUs from subdomains.
-	 */
-	domain_wakeup = ktime_set(KTIME_SEC_MAX, 0);
-	for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) {
-		dev = per_cpu(cpuidle_devices, cpu);
-		if (dev) {
-			next_hrtimer = READ_ONCE(dev->next_hrtimer);
-			if (ktime_before(next_hrtimer, domain_wakeup))
-				domain_wakeup = next_hrtimer;
-		}
-	}
-
-	/* The minimum idle duration is from now - until the next wakeup. */
-	idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
-	if (idle_duration_ns <= 0)
-		return false;
-
-	/*
-	 * Find the deepest idle state that has its residency value satisfied
-	 * and by also taking into account the power off latency for the state.
-	 * Start at the state picked by the dev PM QoS constraint validation.
-	 */
-	i = genpd->state_idx;
-	do {
-		if (idle_duration_ns >= (genpd->states[i].residency_ns +
-		    genpd->states[i].power_off_latency_ns)) {
-			genpd->state_idx = i;
-			return true;
-		}
-	} while (--i >= 0);
-
-	return false;
-}
-
-struct dev_power_governor pm_domain_cpu_gov = {
-	.suspend_ok = default_suspend_ok,
-	.power_down_ok = cpu_power_down_ok,
-};
-#endif
-
-struct dev_power_governor simple_qos_governor = {
-	.suspend_ok = default_suspend_ok,
-	.power_down_ok = default_power_down_ok,
-};
-
-/**
- * pm_genpd_gov_always_on - A governor implementing an always-on policy
- */
-struct dev_power_governor pm_domain_always_on_gov = {
-	.power_down_ok = always_on_power_down_ok,
-	.suspend_ok = default_suspend_ok,
-};
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 4fa525668cb7..af99bbcf281c 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -8,6 +8,13 @@
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
 
+#define CALL_PM_OP(dev, op) \
+({ \
+	struct device *_dev = (dev); \
+	const struct dev_pm_ops *pm = _dev->driver ? _dev->driver->pm : NULL; \
+	pm && pm->op ? pm->op(_dev) : 0; \
+})
+
 #ifdef CONFIG_PM
 /**
  * pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems.
@@ -19,12 +26,7 @@
  */
 int pm_generic_runtime_suspend(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int ret;
-
-	ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : 0;
-
-	return ret;
+	return CALL_PM_OP(dev, runtime_suspend);
 }
 EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
 
@@ -38,12 +40,7 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
  */
 int pm_generic_runtime_resume(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int ret;
-
-	ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : 0;
-
-	return ret;
+	return CALL_PM_OP(dev, runtime_resume);
 }
 EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
 #endif /* CONFIG_PM */
@@ -72,9 +69,7 @@ int pm_generic_prepare(struct device *dev)
  */
 int pm_generic_suspend_noirq(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->suspend_noirq ? pm->suspend_noirq(dev) : 0;
+	return CALL_PM_OP(dev, suspend_noirq);
 }
 EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
 
@@ -84,9 +79,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
  */
 int pm_generic_suspend_late(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->suspend_late ? pm->suspend_late(dev) : 0;
+	return CALL_PM_OP(dev, suspend_late);
 }
 EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
 
@@ -96,9 +89,7 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
  */
 int pm_generic_suspend(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->suspend ? pm->suspend(dev) : 0;
+	return CALL_PM_OP(dev, suspend);
 }
 EXPORT_SYMBOL_GPL(pm_generic_suspend);
 
@@ -108,33 +99,17 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend);
  */
 int pm_generic_freeze_noirq(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->freeze_noirq ? pm->freeze_noirq(dev) : 0;
+	return CALL_PM_OP(dev, freeze_noirq);
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq);
 
 /**
- * pm_generic_freeze_late - Generic freeze_late callback for subsystems.
- * @dev: Device to freeze.
- */
-int pm_generic_freeze_late(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->freeze_late ? pm->freeze_late(dev) : 0;
-}
-EXPORT_SYMBOL_GPL(pm_generic_freeze_late);
-
-/**
  * pm_generic_freeze - Generic freeze callback for subsystems.
  * @dev: Device to freeze.
  */
 int pm_generic_freeze(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->freeze ? pm->freeze(dev) : 0;
+	return CALL_PM_OP(dev, freeze);
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze);
 
@@ -144,9 +119,7 @@ EXPORT_SYMBOL_GPL(pm_generic_freeze);
  */
 int pm_generic_poweroff_noirq(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->poweroff_noirq ? pm->poweroff_noirq(dev) : 0;
+	return CALL_PM_OP(dev, poweroff_noirq);
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
 
@@ -156,9 +129,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
  */
 int pm_generic_poweroff_late(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->poweroff_late ? pm->poweroff_late(dev) : 0;
+	return CALL_PM_OP(dev, poweroff_late);
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
 
@@ -168,9 +139,7 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
  */
 int pm_generic_poweroff(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->poweroff ? pm->poweroff(dev) : 0;
+	return CALL_PM_OP(dev, poweroff);
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff);
 
@@ -180,33 +149,17 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff);
  */
 int pm_generic_thaw_noirq(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->thaw_noirq ? pm->thaw_noirq(dev) : 0;
+	return CALL_PM_OP(dev, thaw_noirq);
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq);
 
 /**
- * pm_generic_thaw_early - Generic thaw_early callback for subsystems.
- * @dev: Device to thaw.
- */
-int pm_generic_thaw_early(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->thaw_early ? pm->thaw_early(dev) : 0;
-}
-EXPORT_SYMBOL_GPL(pm_generic_thaw_early);
-
-/**
  * pm_generic_thaw - Generic thaw callback for subsystems.
  * @dev: Device to thaw.
  */
 int pm_generic_thaw(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->thaw ? pm->thaw(dev) : 0;
+	return CALL_PM_OP(dev, thaw);
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw);
 
@@ -216,9 +169,7 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw);
  */
 int pm_generic_resume_noirq(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->resume_noirq ? pm->resume_noirq(dev) : 0;
+	return CALL_PM_OP(dev, resume_noirq);
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
 
@@ -228,9 +179,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
  */
 int pm_generic_resume_early(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->resume_early ? pm->resume_early(dev) : 0;
+	return CALL_PM_OP(dev, resume_early);
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume_early);
 
@@ -240,9 +189,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_early);
  */
 int pm_generic_resume(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->resume ? pm->resume(dev) : 0;
+	return CALL_PM_OP(dev, resume);
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume);
 
@@ -252,9 +199,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume);
  */
 int pm_generic_restore_noirq(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->restore_noirq ? pm->restore_noirq(dev) : 0;
+	return CALL_PM_OP(dev, restore_noirq);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
 
@@ -264,9 +209,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
  */
 int pm_generic_restore_early(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->restore_early ? pm->restore_early(dev) : 0;
+	return CALL_PM_OP(dev, restore_early);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore_early);
 
@@ -276,9 +219,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_early);
  */
 int pm_generic_restore(struct device *dev)
 {
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	return pm && pm->restore ? pm->restore(dev) : 0;
+	return CALL_PM_OP(dev, restore);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
 
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index d568772152c2..97a8b4fcf471 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -32,19 +32,15 @@
 #include <linux/suspend.h>
 #include <trace/events/power.h>
 #include <linux/cpufreq.h>
-#include <linux/cpuidle.h>
 #include <linux/devfreq.h>
 #include <linux/timer.h>
+#include <linux/nmi.h>
 
 #include "../base.h"
 #include "power.h"
 
 typedef int (*pm_callback_t)(struct device *);
 
-#define list_for_each_entry_rcu_locked(pos, head, member) \
-	list_for_each_entry_rcu(pos, head, member, \
-			device_links_read_lock_held())
-
 /*
  * The entries in the dpm_list list are in a depth first order, simply
  * because children are guaranteed to be discovered after parents, and
@@ -61,12 +57,26 @@ static LIST_HEAD(dpm_suspended_list);
 static LIST_HEAD(dpm_late_early_list);
 static LIST_HEAD(dpm_noirq_list);
 
-struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+static DEFINE_MUTEX(async_wip_mtx);
 static int async_error;
 
+/**
+ * pm_hibernate_is_recovering - if recovering from hibernate due to error.
+ *
+ * Used to query if dev_pm_ops.thaw() is called for normal hibernation case or
+ * recovering from some error.
+ *
+ * Return: true for error case, false for normal case.
+ */
+bool pm_hibernate_is_recovering(void)
+{
+	return pm_transition.event == PM_EVENT_RECOVER;
+}
+EXPORT_SYMBOL_GPL(pm_hibernate_is_recovering);
+
 static const char *pm_verb(int event)
 {
 	switch (event) {
@@ -86,6 +96,8 @@ static const char *pm_verb(int event)
 		return "restore";
 	case PM_EVENT_RECOVER:
 		return "recover";
+	case PM_EVENT_POWEROFF:
+		return "poweroff";
 	default:
 		return "(unknown PM event)";
 	}
@@ -210,7 +222,7 @@ static ktime_t initcall_debug_start(struct device *dev, void *cb)
 	if (!pm_print_times_enabled)
 		return 0;
 
-	dev_info(dev, "calling %pS @ %i, parent: %s\n", cb,
+	dev_info(dev, "calling %ps @ %i, parent: %s\n", cb,
 		 task_pid_nr(current),
 		 dev->parent ? dev_name(dev->parent) : "none");
 	return ktime_get();
@@ -225,7 +237,7 @@ static void initcall_debug_report(struct device *dev, ktime_t calltime,
 		return;
 
 	rettime = ktime_get();
-	dev_info(dev, "%pS returned %d after %Ld usecs\n", cb, error,
+	dev_info(dev, "%ps returned %d after %Ld usecs\n", cb, error,
 		 (unsigned long long)ktime_us_delta(rettime, calltime));
 }
 
@@ -251,7 +263,7 @@ static int dpm_wait_fn(struct device *dev, void *async_ptr)
 
 static void dpm_wait_for_children(struct device *dev, bool async)
 {
-       device_for_each_child(dev, &async, dpm_wait_fn);
+	device_for_each_child(dev, &async, dpm_wait_fn);
 }
 
 static void dpm_wait_for_suppliers(struct device *dev, bool async)
@@ -268,8 +280,9 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async)
 	 * callbacks freeing the link objects for the links in the list we're
 	 * walking.
 	 */
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
-		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
+	dev_for_each_link_to_supplier(link, dev)
+		if (READ_ONCE(link->status) != DL_STATE_DORMANT &&
+		    !device_link_flag_is_sync_state_only(link->flags))
 			dpm_wait(link->supplier, async);
 
 	device_links_read_unlock(idx);
@@ -325,8 +338,9 @@ static void dpm_wait_for_consumers(struct device *dev, bool async)
 	 * continue instead of trying to continue in parallel with its
 	 * unregistration).
 	 */
-	list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node)
-		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
+	dev_for_each_link_to_consumer(link, dev)
+		if (READ_ONCE(link->status) != DL_STATE_DORMANT &&
+		    !device_link_flag_is_sync_state_only(link->flags))
 			dpm_wait(link->consumer, async);
 
 	device_links_read_unlock(idx);
@@ -356,6 +370,7 @@ static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		return ops->freeze;
+	case PM_EVENT_POWEROFF:
 	case PM_EVENT_HIBERNATE:
 		return ops->poweroff;
 	case PM_EVENT_THAW:
@@ -390,6 +405,7 @@ static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops,
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		return ops->freeze_late;
+	case PM_EVENT_POWEROFF:
 	case PM_EVENT_HIBERNATE:
 		return ops->poweroff_late;
 	case PM_EVENT_THAW:
@@ -424,6 +440,7 @@ static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t stat
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		return ops->freeze_noirq;
+	case PM_EVENT_POWEROFF:
 	case PM_EVENT_HIBERNATE:
 		return ops->poweroff_noirq;
 	case PM_EVENT_THAW:
@@ -486,7 +503,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev);
 	trace_device_pm_callback_end(dev, error);
-	suspend_report_result(cb, error);
+	suspend_report_result(dev, cb, error);
 
 	initcall_debug_report(dev, calltime, cb, error);
 
@@ -498,11 +515,17 @@ struct dpm_watchdog {
 	struct device		*dev;
 	struct task_struct	*tsk;
 	struct timer_list	timer;
+	bool			fatal;
 };
 
 #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
 	struct dpm_watchdog wd
 
+static bool __read_mostly dpm_watchdog_all_cpu_backtrace;
+module_param(dpm_watchdog_all_cpu_backtrace, bool, 0644);
+MODULE_PARM_DESC(dpm_watchdog_all_cpu_backtrace,
+		 "Backtrace all CPUs on DPM watchdog timeout");
+
 /**
  * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
  * @t: The timer that PM watchdog depends on.
@@ -513,12 +536,28 @@ struct dpm_watchdog {
  */
 static void dpm_watchdog_handler(struct timer_list *t)
 {
-	struct dpm_watchdog *wd = from_timer(wd, t, timer);
+	struct dpm_watchdog *wd = timer_container_of(wd, t, timer);
+	struct timer_list *timer = &wd->timer;
+	unsigned int time_left;
+
+	if (wd->fatal) {
+		unsigned int this_cpu = smp_processor_id();
 
-	dev_emerg(wd->dev, "**** DPM device timeout ****\n");
-	show_stack(wd->tsk, NULL, KERN_EMERG);
-	panic("%s %s: unrecoverable failure\n",
-		dev_driver_string(wd->dev), dev_name(wd->dev));
+		dev_emerg(wd->dev, "**** DPM device timeout ****\n");
+		show_stack(wd->tsk, NULL, KERN_EMERG);
+		if (dpm_watchdog_all_cpu_backtrace)
+			trigger_allbutcpu_cpu_backtrace(this_cpu);
+		panic("%s %s: unrecoverable failure\n",
+			dev_driver_string(wd->dev), dev_name(wd->dev));
+	}
+
+	time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
+	dev_warn(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n",
+		 CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left);
+	show_stack(wd->tsk, NULL, KERN_WARNING);
+
+	wd->fatal = true;
+	mod_timer(timer, jiffies + HZ * time_left);
 }
 
 /**
@@ -532,10 +571,11 @@ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
 
 	wd->dev = dev;
 	wd->tsk = current;
+	wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
 
 	timer_setup_on_stack(timer, dpm_watchdog_handler, 0);
 	/* use same timeout value for both suspend and resume */
-	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
 	add_timer(timer);
 }
 
@@ -547,8 +587,8 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd)
 {
 	struct timer_list *timer = &wd->timer;
 
-	del_timer_sync(timer);
-	destroy_timer_on_stack(timer);
+	timer_delete_sync(timer);
+	timer_destroy_on_stack(timer);
 }
 #else
 #define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
@@ -579,6 +619,102 @@ bool dev_pm_skip_resume(struct device *dev)
 	return !dev->power.must_resume;
 }
 
+static bool is_async(struct device *dev)
+{
+	return dev->power.async_suspend && pm_async_enabled
+		&& !pm_trace_is_enabled();
+}
+
+static bool __dpm_async(struct device *dev, async_func_t func)
+{
+	if (dev->power.work_in_progress)
+		return true;
+
+	if (!is_async(dev))
+		return false;
+
+	dev->power.work_in_progress = true;
+
+	get_device(dev);
+
+	if (async_schedule_dev_nocall(func, dev))
+		return true;
+
+	put_device(dev);
+
+	return false;
+}
+
+static bool dpm_async_fn(struct device *dev, async_func_t func)
+{
+	guard(mutex)(&async_wip_mtx);
+
+	return __dpm_async(dev, func);
+}
+
+static int dpm_async_with_cleanup(struct device *dev, void *fn)
+{
+	guard(mutex)(&async_wip_mtx);
+
+	if (!__dpm_async(dev, fn))
+		dev->power.work_in_progress = false;
+
+	return 0;
+}
+
+static void dpm_async_resume_children(struct device *dev, async_func_t func)
+{
+	/*
+	 * Prevent racing with dpm_clear_async_state() during initial list
+	 * walks in dpm_noirq_resume_devices(), dpm_resume_early(), and
+	 * dpm_resume().
+	 */
+	guard(mutex)(&dpm_list_mtx);
+
+	/*
+	 * Start processing "async" children of the device unless it's been
+	 * started already for them.
+	 */
+	device_for_each_child(dev, func, dpm_async_with_cleanup);
+}
+
+static void dpm_async_resume_subordinate(struct device *dev, async_func_t func)
+{
+	struct device_link *link;
+	int idx;
+
+	dpm_async_resume_children(dev, func);
+
+	idx = device_links_read_lock();
+
+	/* Start processing the device's "async" consumers. */
+	dev_for_each_link_to_consumer(link, dev)
+		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
+			dpm_async_with_cleanup(link->consumer, func);
+
+	device_links_read_unlock(idx);
+}
+
+static void dpm_clear_async_state(struct device *dev)
+{
+	reinit_completion(&dev->power.completion);
+	dev->power.work_in_progress = false;
+}
+
+static bool dpm_root_device(struct device *dev)
+{
+	lockdep_assert_held(&dpm_list_mtx);
+
+	/*
+	 * Since this function is required to run under dpm_list_mtx, the
+	 * list_empty() below will only return true if the device's list of
+	 * consumers is actually empty before calling it.
+	 */
+	return !dev->parent && list_empty(&dev->links.suppliers);
+}
+
+static void async_resume_noirq(void *data, async_cookie_t cookie);
+
 /**
  * device_resume_noirq - Execute a "noirq resume" callback for given device.
  * @dev: Device to handle.
@@ -588,7 +724,7 @@ bool dev_pm_skip_resume(struct device *dev)
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
+static void device_resume_noirq(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
@@ -601,8 +737,20 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Out;
 
-	if (!dev->power.is_noirq_suspended)
+	if (!dev->power.is_noirq_suspended) {
+		/*
+		 * This means that system suspend has been aborted in the noirq
+		 * phase before invoking the noirq suspend callback for the
+		 * device, so if device_suspend_late() has left it in suspend,
+		 * device_resume_early() should leave it in suspend either in
+		 * case the early resume of it depends on the noirq resume that
+		 * has not run.
+		 */
+		if (dev_pm_skip_suspend(dev))
+			dev->power.must_resume = false;
+
 		goto Out;
+	}
 
 	if (!dpm_wait_for_superior(dev, async))
 		goto Out;
@@ -615,12 +763,12 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
 	 * so change its status accordingly.
 	 *
 	 * Otherwise, the device is going to be resumed, so set its PM-runtime
-	 * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set
-	 * to avoid confusing drivers that don't use it.
+	 * status to "active" unless its power.smart_suspend flag is clear, in
+	 * which case it is not necessary to update its PM-runtime status.
 	 */
 	if (skip_resume)
 		pm_runtime_set_suspended(dev);
-	else if (dev_pm_skip_suspend(dev))
+	else if (dev_pm_smart_suspend(dev))
 		pm_runtime_set_active(dev);
 
 	if (dev->pm_domain) {
@@ -656,37 +804,21 @@ Skip:
 Out:
 	complete_all(&dev->power.completion);
 	TRACE_RESUME(error);
-	return error;
-}
-
-static bool is_async(struct device *dev)
-{
-	return dev->power.async_suspend && pm_async_enabled
-		&& !pm_trace_is_enabled();
-}
 
-static bool dpm_async_fn(struct device *dev, async_func_t func)
-{
-	reinit_completion(&dev->power.completion);
-
-	if (is_async(dev)) {
-		get_device(dev);
-		async_schedule_dev(func, dev);
-		return true;
+	if (error) {
+		WRITE_ONCE(async_error, error);
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
 	}
 
-	return false;
+	dpm_async_resume_subordinate(dev, async_resume_noirq);
 }
 
 static void async_resume_noirq(void *data, async_cookie_t cookie)
 {
-	struct device *dev = (struct device *)data;
-	int error;
-
-	error = device_resume_noirq(dev, pm_transition, true);
-	if (error)
-		pm_dev_err(dev, pm_transition, " async", error);
+	struct device *dev = data;
 
+	device_resume_noirq(dev, pm_transition, true);
 	put_device(dev);
 }
 
@@ -696,41 +828,44 @@ static void dpm_noirq_resume_devices(pm_message_t state)
 	ktime_t starttime = ktime_get();
 
 	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);
-	mutex_lock(&dpm_list_mtx);
+
+	async_error = 0;
 	pm_transition = state;
 
+	mutex_lock(&dpm_list_mtx);
+
 	/*
-	 * Advanced the async threads upfront,
-	 * in case the starting of async threads is
-	 * delayed by non-async resuming devices.
+	 * Start processing "async" root devices upfront so they don't wait for
+	 * the "sync" devices they don't depend on.
 	 */
-	list_for_each_entry(dev, &dpm_noirq_list, power.entry)
-		dpm_async_fn(dev, async_resume_noirq);
+	list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
+		dpm_clear_async_state(dev);
+		if (dpm_root_device(dev))
+			dpm_async_with_cleanup(dev, async_resume_noirq);
+	}
 
 	while (!list_empty(&dpm_noirq_list)) {
 		dev = to_device(dpm_noirq_list.next);
-		get_device(dev);
 		list_move_tail(&dev->power.entry, &dpm_late_early_list);
-		mutex_unlock(&dpm_list_mtx);
 
-		if (!is_async(dev)) {
-			int error;
+		if (!dpm_async_fn(dev, async_resume_noirq)) {
+			get_device(dev);
 
-			error = device_resume_noirq(dev, state, false);
-			if (error) {
-				suspend_stats.failed_resume_noirq++;
-				dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
-				dpm_save_failed_dev(dev_name(dev));
-				pm_dev_err(dev, state, " noirq", error);
-			}
-		}
+			mutex_unlock(&dpm_list_mtx);
 
-		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
+			device_resume_noirq(dev, state, false);
+
+			put_device(dev);
+
+			mutex_lock(&dpm_list_mtx);
+		}
 	}
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, 0, "noirq");
+	if (READ_ONCE(async_error))
+		dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
+
 	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
 }
 
@@ -747,10 +882,10 @@ void dpm_resume_noirq(pm_message_t state)
 
 	resume_device_irqs();
 	device_wakeup_disarm_wake_irqs();
-
-	cpuidle_resume();
 }
 
+static void async_resume_early(void *data, async_cookie_t cookie);
+
 /**
  * device_resume_early - Execute an "early resume" callback for given device.
  * @dev: Device to handle.
@@ -759,7 +894,7 @@ void dpm_resume_noirq(pm_message_t state)
  *
  * Runtime PM is disabled for @dev while this function is being executed.
  */
-static int device_resume_early(struct device *dev, pm_message_t state, bool async)
+static void device_resume_early(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
@@ -768,12 +903,15 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	if (dev->power.syscore || dev->power.direct_complete)
+	if (dev->power.direct_complete)
 		goto Out;
 
 	if (!dev->power.is_late_suspended)
 		goto Out;
 
+	if (dev->power.syscore)
+		goto Skip;
+
 	if (!dpm_wait_for_superior(dev, async))
 		goto Out;
 
@@ -806,24 +944,27 @@ Run:
 
 Skip:
 	dev->power.is_late_suspended = false;
+	pm_runtime_enable(dev);
 
 Out:
 	TRACE_RESUME(error);
 
-	pm_runtime_enable(dev);
 	complete_all(&dev->power.completion);
-	return error;
+
+	if (error) {
+		WRITE_ONCE(async_error, error);
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, state, async ? " async early" : " early", error);
+	}
+
+	dpm_async_resume_subordinate(dev, async_resume_early);
 }
 
 static void async_resume_early(void *data, async_cookie_t cookie)
 {
-	struct device *dev = (struct device *)data;
-	int error;
-
-	error = device_resume_early(dev, pm_transition, true);
-	if (error)
-		pm_dev_err(dev, pm_transition, " async", error);
+	struct device *dev = data;
 
+	device_resume_early(dev, pm_transition, true);
 	put_device(dev);
 }
 
@@ -837,40 +978,44 @@ void dpm_resume_early(pm_message_t state)
 	ktime_t starttime = ktime_get();
 
 	trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);
-	mutex_lock(&dpm_list_mtx);
+
+	async_error = 0;
 	pm_transition = state;
 
+	mutex_lock(&dpm_list_mtx);
+
 	/*
-	 * Advanced the async threads upfront,
-	 * in case the starting of async threads is
-	 * delayed by non-async resuming devices.
+	 * Start processing "async" root devices upfront so they don't wait for
+	 * the "sync" devices they don't depend on.
 	 */
-	list_for_each_entry(dev, &dpm_late_early_list, power.entry)
-		dpm_async_fn(dev, async_resume_early);
+	list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
+		dpm_clear_async_state(dev);
+		if (dpm_root_device(dev))
+			dpm_async_with_cleanup(dev, async_resume_early);
+	}
 
 	while (!list_empty(&dpm_late_early_list)) {
 		dev = to_device(dpm_late_early_list.next);
-		get_device(dev);
 		list_move_tail(&dev->power.entry, &dpm_suspended_list);
-		mutex_unlock(&dpm_list_mtx);
 
-		if (!is_async(dev)) {
-			int error;
+		if (!dpm_async_fn(dev, async_resume_early)) {
+			get_device(dev);
 
-			error = device_resume_early(dev, state, false);
-			if (error) {
-				suspend_stats.failed_resume_early++;
-				dpm_save_failed_step(SUSPEND_RESUME_EARLY);
-				dpm_save_failed_dev(dev_name(dev));
-				pm_dev_err(dev, state, " early", error);
-			}
+			mutex_unlock(&dpm_list_mtx);
+
+			device_resume_early(dev, state, false);
+
+			put_device(dev);
+
+			mutex_lock(&dpm_list_mtx);
 		}
-		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, 0, "early");
+	if (READ_ONCE(async_error))
+		dpm_save_failed_step(SUSPEND_RESUME_EARLY);
+
 	trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
 }
 
@@ -885,13 +1030,15 @@ void dpm_resume_start(pm_message_t state)
 }
 EXPORT_SYMBOL_GPL(dpm_resume_start);
 
+static void async_resume(void *data, async_cookie_t cookie);
+
 /**
  * device_resume - Execute "resume" callbacks for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  * @async: If true, the device is being resumed asynchronously.
  */
-static int device_resume(struct device *dev, pm_message_t state, bool async)
+static void device_resume(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
@@ -904,8 +1051,20 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	if (dev->power.syscore)
 		goto Complete;
 
+	if (!dev->power.is_suspended)
+		goto Complete;
+
+	dev->power.is_suspended = false;
+
 	if (dev->power.direct_complete) {
-		/* Match the pm_runtime_disable() in __device_suspend(). */
+		/*
+		 * Allow new children to be added under the device after this
+		 * point if it has no PM callbacks.
+		 */
+		if (dev->power.no_pm_callbacks)
+			dev->power.is_prepared = false;
+
+		/* Match the pm_runtime_disable() in device_suspend(). */
 		pm_runtime_enable(dev);
 		goto Complete;
 	}
@@ -922,9 +1081,6 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	 */
 	dev->power.is_prepared = false;
 
-	if (!dev->power.is_suspended)
-		goto Unlock;
-
 	if (dev->pm_domain) {
 		info = "power domain ";
 		callback = pm_op(&dev->pm_domain->ops, state);
@@ -962,9 +1118,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 
  End:
 	error = dpm_run_callback(callback, dev, state, info);
-	dev->power.is_suspended = false;
 
- Unlock:
 	device_unlock(dev);
 	dpm_watchdog_clear(&wd);
 
@@ -973,17 +1127,20 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 
 	TRACE_RESUME(error);
 
-	return error;
+	if (error) {
+		WRITE_ONCE(async_error, error);
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, state, async ? " async" : "", error);
+	}
+
+	dpm_async_resume_subordinate(dev, async_resume);
 }
 
 static void async_resume(void *data, async_cookie_t cookie)
 {
-	struct device *dev = (struct device *)data;
-	int error;
+	struct device *dev = data;
 
-	error = device_resume(dev, pm_transition, true);
-	if (error)
-		pm_dev_err(dev, pm_transition, " async", error);
+	device_resume(dev, pm_transition, true);
 	put_device(dev);
 }
 
@@ -1000,40 +1157,43 @@ void dpm_resume(pm_message_t state)
 	ktime_t starttime = ktime_get();
 
 	trace_suspend_resume(TPS("dpm_resume"), state.event, true);
-	might_sleep();
 
-	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
 
-	list_for_each_entry(dev, &dpm_suspended_list, power.entry)
-		dpm_async_fn(dev, async_resume);
+	mutex_lock(&dpm_list_mtx);
+
+	/*
+	 * Start processing "async" root devices upfront so they don't wait for
+	 * the "sync" devices they don't depend on.
+	 */
+	list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
+		dpm_clear_async_state(dev);
+		if (dpm_root_device(dev))
+			dpm_async_with_cleanup(dev, async_resume);
+	}
 
 	while (!list_empty(&dpm_suspended_list)) {
 		dev = to_device(dpm_suspended_list.next);
-		get_device(dev);
-		if (!is_async(dev)) {
-			int error;
+		list_move_tail(&dev->power.entry, &dpm_prepared_list);
+
+		if (!dpm_async_fn(dev, async_resume)) {
+			get_device(dev);
 
 			mutex_unlock(&dpm_list_mtx);
 
-			error = device_resume(dev, state, false);
-			if (error) {
-				suspend_stats.failed_resume++;
-				dpm_save_failed_step(SUSPEND_RESUME);
-				dpm_save_failed_dev(dev_name(dev));
-				pm_dev_err(dev, state, "", error);
-			}
+			device_resume(dev, state, false);
+
+			put_device(dev);
 
 			mutex_lock(&dpm_list_mtx);
 		}
-		if (!list_empty(&dev->power.entry))
-			list_move_tail(&dev->power.entry, &dpm_prepared_list);
-		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, 0, NULL);
+	if (READ_ONCE(async_error))
+		dpm_save_failed_step(SUSPEND_RESUME);
 
 	cpufreq_resume();
 	devfreq_resume();
@@ -1051,7 +1211,7 @@ static void device_complete(struct device *dev, pm_message_t state)
 	const char *info = NULL;
 
 	if (dev->power.syscore)
-		return;
+		goto out;
 
 	device_lock(dev);
 
@@ -1081,6 +1241,9 @@ static void device_complete(struct device *dev, pm_message_t state)
 
 	device_unlock(dev);
 
+out:
+	/* If enabling runtime PM for the device is blocked, unblock it. */
+	pm_runtime_unblock(dev);
 	pm_runtime_put(dev);
 }
 
@@ -1096,7 +1259,6 @@ void dpm_complete(pm_message_t state)
 	struct list_head list;
 
 	trace_suspend_resume(TPS("dpm_complete"), state.event, true);
-	might_sleep();
 
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
@@ -1106,14 +1268,16 @@ void dpm_complete(pm_message_t state)
 		get_device(dev);
 		dev->power.is_prepared = false;
 		list_move(&dev->power.entry, &list);
+
 		mutex_unlock(&dpm_list_mtx);
 
 		trace_device_pm_callback_start(dev, "", state.event);
 		device_complete(dev, state);
 		trace_device_pm_callback_end(dev, 0);
 
-		mutex_lock(&dpm_list_mtx);
 		put_device(dev);
+
+		mutex_lock(&dpm_list_mtx);
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
@@ -1133,6 +1297,7 @@ void dpm_complete(pm_message_t state)
 void dpm_resume_end(pm_message_t state)
 {
 	dpm_resume(state);
+	pm_restore_gfp_mask();
 	dpm_complete(state);
 }
 EXPORT_SYMBOL_GPL(dpm_resume_end);
@@ -1140,6 +1305,82 @@ EXPORT_SYMBOL_GPL(dpm_resume_end);
 
 /*------------------------- Suspend routines -------------------------*/
 
+static bool dpm_leaf_device(struct device *dev)
+{
+	struct device *child;
+
+	lockdep_assert_held(&dpm_list_mtx);
+
+	child = device_find_any_child(dev);
+	if (child) {
+		put_device(child);
+
+		return false;
+	}
+
+	/*
+	 * Since this function is required to run under dpm_list_mtx, the
+	 * list_empty() below will only return true if the device's list of
+	 * consumers is actually empty before calling it.
+	 */
+	return list_empty(&dev->links.consumers);
+}
+
+static bool dpm_async_suspend_parent(struct device *dev, async_func_t func)
+{
+	guard(mutex)(&dpm_list_mtx);
+
+	/*
+	 * If the device is suspended asynchronously and the parent's callback
+	 * deletes both the device and the parent itself, the parent object may
+	 * be freed while this function is running, so avoid that by checking
+	 * if the device has been deleted already as the parent cannot be
+	 * deleted before it.
+	 */
+	if (!device_pm_initialized(dev))
+		return false;
+
+	/* Start processing the device's parent if it is "async". */
+	if (dev->parent)
+		dpm_async_with_cleanup(dev->parent, func);
+
+	return true;
+}
+
+static void dpm_async_suspend_superior(struct device *dev, async_func_t func)
+{
+	struct device_link *link;
+	int idx;
+
+	if (!dpm_async_suspend_parent(dev, func))
+		return;
+
+	idx = device_links_read_lock();
+
+	/* Start processing the device's "async" suppliers. */
+	dev_for_each_link_to_supplier(link, dev)
+		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
+			dpm_async_with_cleanup(link->supplier, func);
+
+	device_links_read_unlock(idx);
+}
+
+static void dpm_async_suspend_complete_all(struct list_head *device_list)
+{
+	struct device *dev;
+
+	guard(mutex)(&async_wip_mtx);
+
+	list_for_each_entry_reverse(dev, device_list, power.entry) {
+		/*
+		 * In case the device is being waited for and async processing
+		 * has not started for it yet, let the waiters make progress.
+		 */
+		if (!dev->power.work_in_progress)
+			complete_all(&dev->power.completion);
+	}
+}
+
 /**
  * resume_event - Return a "resume" message for given "suspend" sleep state.
  * @sleep_state: PM message representing a sleep state.
@@ -1171,14 +1412,16 @@ static void dpm_superior_set_must_resume(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
+	dev_for_each_link_to_supplier(link, dev)
 		link->supplier->power.must_resume = true;
 
 	device_links_read_unlock(idx);
 }
 
+static void async_suspend_noirq(void *data, async_cookie_t cookie);
+
 /**
- * __device_suspend_noirq - Execute a "noirq suspend" callback for given device.
+ * device_suspend_noirq - Execute a "noirq suspend" callback for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  * @async: If true, the device is being suspended asynchronously.
@@ -1186,7 +1429,7 @@ static void dpm_superior_set_must_resume(struct device *dev)
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
+static void device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
@@ -1197,7 +1440,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 
 	dpm_wait_for_subordinate(dev, async);
 
-	if (async_error)
+	if (READ_ONCE(async_error))
 		goto Complete;
 
 	if (dev->power.syscore || dev->power.direct_complete)
@@ -1230,7 +1473,9 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 Run:
 	error = dpm_run_callback(callback, dev, state, info);
 	if (error) {
-		async_error = error;
+		WRITE_ONCE(async_error, error);
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
 		goto Complete;
 	}
 
@@ -1238,14 +1483,13 @@ Skip:
 	dev->power.is_noirq_suspended = true;
 
 	/*
-	 * Skipping the resume of devices that were in use right before the
-	 * system suspend (as indicated by their PM-runtime usage counters)
-	 * would be suboptimal.  Also resume them if doing that is not allowed
-	 * to be skipped.
+	 * Devices must be resumed unless they are explicitly allowed to be left
+	 * in suspend, but even in that case skipping the resume of devices that
+	 * were in use right before the system suspend (as indicated by their
+	 * runtime PM usage counters and child counters) would be suboptimal.
 	 */
-	if (atomic_read(&dev->power.usage_count) > 1 ||
-	    !(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) &&
-	      dev->power.may_skip_resume))
+	if (!(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) &&
+	      dev->power.may_skip_resume) || !pm_runtime_need_not_resume(dev))
 		dev->power.must_resume = true;
 
 	if (dev->power.must_resume)
@@ -1254,72 +1498,81 @@ Skip:
 Complete:
 	complete_all(&dev->power.completion);
 	TRACE_SUSPEND(error);
-	return error;
-}
-
-static void async_suspend_noirq(void *data, async_cookie_t cookie)
-{
-	struct device *dev = (struct device *)data;
-	int error;
 
-	error = __device_suspend_noirq(dev, pm_transition, true);
-	if (error) {
-		dpm_save_failed_dev(dev_name(dev));
-		pm_dev_err(dev, pm_transition, " async", error);
-	}
+	if (error || READ_ONCE(async_error))
+		return;
 
-	put_device(dev);
+	dpm_async_suspend_superior(dev, async_suspend_noirq);
 }
 
-static int device_suspend_noirq(struct device *dev)
+static void async_suspend_noirq(void *data, async_cookie_t cookie)
 {
-	if (dpm_async_fn(dev, async_suspend_noirq))
-		return 0;
+	struct device *dev = data;
 
-	return __device_suspend_noirq(dev, pm_transition, false);
+	device_suspend_noirq(dev, pm_transition, true);
+	put_device(dev);
 }
 
 static int dpm_noirq_suspend_devices(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
-	int error = 0;
+	struct device *dev;
+	int error;
 
 	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
-	mutex_lock(&dpm_list_mtx);
+
 	pm_transition = state;
 	async_error = 0;
 
+	mutex_lock(&dpm_list_mtx);
+
+	/*
+	 * Start processing "async" leaf devices upfront so they don't need to
+	 * wait for the "sync" devices they don't depend on.
+	 */
+	list_for_each_entry_reverse(dev, &dpm_late_early_list, power.entry) {
+		dpm_clear_async_state(dev);
+		if (dpm_leaf_device(dev))
+			dpm_async_with_cleanup(dev, async_suspend_noirq);
+	}
+
 	while (!list_empty(&dpm_late_early_list)) {
-		struct device *dev = to_device(dpm_late_early_list.prev);
+		dev = to_device(dpm_late_early_list.prev);
+
+		list_move(&dev->power.entry, &dpm_noirq_list);
+
+		if (dpm_async_fn(dev, async_suspend_noirq))
+			continue;
 
 		get_device(dev);
+
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_suspend_noirq(dev);
+		device_suspend_noirq(dev, state, false);
 
-		mutex_lock(&dpm_list_mtx);
-		if (error) {
-			pm_dev_err(dev, state, " noirq", error);
-			dpm_save_failed_dev(dev_name(dev));
-			put_device(dev);
-			break;
-		}
-		if (!list_empty(&dev->power.entry))
-			list_move(&dev->power.entry, &dpm_noirq_list);
 		put_device(dev);
 
-		if (async_error)
+		mutex_lock(&dpm_list_mtx);
+
+		if (READ_ONCE(async_error)) {
+			dpm_async_suspend_complete_all(&dpm_late_early_list);
+			/*
+			 * Move all devices to the target list to resume them
+			 * properly.
+			 */
+			list_splice_init(&dpm_late_early_list, &dpm_noirq_list);
 			break;
+		}
 	}
+
 	mutex_unlock(&dpm_list_mtx);
+
 	async_synchronize_full();
-	if (!error)
-		error = async_error;
 
-	if (error) {
-		suspend_stats.failed_suspend_noirq++;
+	error = READ_ONCE(async_error);
+	if (error)
 		dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);
-	}
+
 	dpm_show_time(starttime, state, error, "noirq");
 	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
 	return error;
@@ -1336,8 +1589,6 @@ int dpm_suspend_noirq(pm_message_t state)
 {
 	int ret;
 
-	cpuidle_pause();
-
 	device_wakeup_arm_wake_irqs();
 	suspend_device_irqs();
 
@@ -1363,15 +1614,17 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev)
 	spin_unlock_irq(&parent->power.lock);
 }
 
+static void async_suspend_late(void *data, async_cookie_t cookie);
+
 /**
- * __device_suspend_late - Execute a "late suspend" callback for given device.
+ * device_suspend_late - Execute a "late suspend" callback for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  * @async: If true, the device is being suspended asynchronously.
  *
  * Runtime PM is disabled for @dev while this function is being executed.
  */
-static int __device_suspend_late(struct device *dev, pm_message_t state, bool async)
+static void device_suspend_late(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
@@ -1380,21 +1633,28 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
 	TRACE_DEVICE(dev);
 	TRACE_SUSPEND(0);
 
-	__pm_runtime_disable(dev, false);
-
 	dpm_wait_for_subordinate(dev, async);
 
-	if (async_error)
+	if (READ_ONCE(async_error))
 		goto Complete;
 
 	if (pm_wakeup_pending()) {
-		async_error = -EBUSY;
+		WRITE_ONCE(async_error, -EBUSY);
 		goto Complete;
 	}
 
-	if (dev->power.syscore || dev->power.direct_complete)
+	if (dev->power.direct_complete)
 		goto Complete;
 
+	/*
+	 * Disable runtime PM for the device without checking if there is a
+	 * pending resume request for it.
+	 */
+	__pm_runtime_disable(dev, false);
+
+	if (dev->power.syscore)
+		goto Skip;
+
 	if (dev->pm_domain) {
 		info = "late power domain ";
 		callback = pm_late_early_op(&dev->pm_domain->ops, state);
@@ -1422,7 +1682,10 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
 Run:
 	error = dpm_run_callback(callback, dev, state, info);
 	if (error) {
-		async_error = error;
+		WRITE_ONCE(async_error, error);
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, state, async ? " async late" : " late", error);
+		pm_runtime_enable(dev);
 		goto Complete;
 	}
 	dpm_propagate_wakeup_to_parent(dev);
@@ -1433,28 +1696,19 @@ Skip:
 Complete:
 	TRACE_SUSPEND(error);
 	complete_all(&dev->power.completion);
-	return error;
-}
 
-static void async_suspend_late(void *data, async_cookie_t cookie)
-{
-	struct device *dev = (struct device *)data;
-	int error;
+	if (error || READ_ONCE(async_error))
+		return;
 
-	error = __device_suspend_late(dev, pm_transition, true);
-	if (error) {
-		dpm_save_failed_dev(dev_name(dev));
-		pm_dev_err(dev, pm_transition, " async", error);
-	}
-	put_device(dev);
+	dpm_async_suspend_superior(dev, async_suspend_late);
 }
 
-static int device_suspend_late(struct device *dev)
+static void async_suspend_late(void *data, async_cookie_t cookie)
 {
-	if (dpm_async_fn(dev, async_suspend_late))
-		return 0;
+	struct device *dev = data;
 
-	return __device_suspend_late(dev, pm_transition, false);
+	device_suspend_late(dev, pm_transition, true);
+	put_device(dev);
 }
 
 /**
@@ -1464,42 +1718,63 @@ static int device_suspend_late(struct device *dev)
 int dpm_suspend_late(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
-	int error = 0;
+	struct device *dev;
+	int error;
 
 	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
-	mutex_lock(&dpm_list_mtx);
+
 	pm_transition = state;
 	async_error = 0;
 
+	wake_up_all_idle_cpus();
+
+	mutex_lock(&dpm_list_mtx);
+
+	/*
+	 * Start processing "async" leaf devices upfront so they don't need to
+	 * wait for the "sync" devices they don't depend on.
+	 */
+	list_for_each_entry_reverse(dev, &dpm_suspended_list, power.entry) {
+		dpm_clear_async_state(dev);
+		if (dpm_leaf_device(dev))
+			dpm_async_with_cleanup(dev, async_suspend_late);
+	}
+
 	while (!list_empty(&dpm_suspended_list)) {
-		struct device *dev = to_device(dpm_suspended_list.prev);
+		dev = to_device(dpm_suspended_list.prev);
+
+		list_move(&dev->power.entry, &dpm_late_early_list);
+
+		if (dpm_async_fn(dev, async_suspend_late))
+			continue;
 
 		get_device(dev);
+
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_suspend_late(dev);
+		device_suspend_late(dev, state, false);
+
+		put_device(dev);
 
 		mutex_lock(&dpm_list_mtx);
-		if (!list_empty(&dev->power.entry))
-			list_move(&dev->power.entry, &dpm_late_early_list);
 
-		if (error) {
-			pm_dev_err(dev, state, " late", error);
-			dpm_save_failed_dev(dev_name(dev));
-			put_device(dev);
+		if (READ_ONCE(async_error)) {
+			dpm_async_suspend_complete_all(&dpm_suspended_list);
+			/*
+			 * Move all devices to the target list to resume them
+			 * properly.
+			 */
+			list_splice_init(&dpm_suspended_list, &dpm_late_early_list);
 			break;
 		}
-		put_device(dev);
-
-		if (async_error)
-			break;
 	}
+
 	mutex_unlock(&dpm_list_mtx);
+
 	async_synchronize_full();
-	if (!error)
-		error = async_error;
+
+	error = READ_ONCE(async_error);
 	if (error) {
-		suspend_stats.failed_suspend_late++;
 		dpm_save_failed_step(SUSPEND_SUSPEND_LATE);
 		dpm_resume_early(resume_event(state));
 	}
@@ -1550,7 +1825,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
 	trace_device_pm_callback_end(dev, error);
-	suspend_report_result(cb, error);
+	suspend_report_result(dev, cb, error);
 
 	initcall_debug_report(dev, calltime, cb, error);
 
@@ -1570,7 +1845,7 @@ static void dpm_clear_superiors_direct_complete(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
+	dev_for_each_link_to_supplier(link, dev) {
 		spin_lock_irq(&link->supplier->power.lock);
 		link->supplier->power.direct_complete = false;
 		spin_unlock_irq(&link->supplier->power.lock);
@@ -1579,13 +1854,15 @@ static void dpm_clear_superiors_direct_complete(struct device *dev)
 	device_links_read_unlock(idx);
 }
 
+static void async_suspend(void *data, async_cookie_t cookie);
+
 /**
- * __device_suspend - Execute "suspend" callbacks for given device.
+ * device_suspend - Execute "suspend" callbacks for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  * @async: If true, the device is being suspended asynchronously.
  */
-static int __device_suspend(struct device *dev, pm_message_t state, bool async)
+static void device_suspend(struct device *dev, pm_message_t state, bool async)
 {
 	pm_callback_t callback = NULL;
 	const char *info = NULL;
@@ -1597,7 +1874,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
 	dpm_wait_for_subordinate(dev, async);
 
-	if (async_error) {
+	if (READ_ONCE(async_error)) {
 		dev->power.direct_complete = false;
 		goto Complete;
 	}
@@ -1617,7 +1894,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
 	if (pm_wakeup_pending()) {
 		dev->power.direct_complete = false;
-		async_error = -EBUSY;
+		WRITE_ONCE(async_error, -EBUSY);
 		goto Complete;
 	}
 
@@ -1633,6 +1910,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 			pm_runtime_disable(dev);
 			if (pm_runtime_status_suspended(dev)) {
 				pm_dev_dbg(dev, state, "direct-complete ");
+				dev->power.is_suspended = true;
 				goto Complete;
 			}
 
@@ -1642,7 +1920,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	}
 
 	dev->power.may_skip_resume = true;
-	dev->power.must_resume = false;
+	dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME);
 
 	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
@@ -1699,34 +1977,27 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	dpm_watchdog_clear(&wd);
 
  Complete:
-	if (error)
-		async_error = error;
+	if (error) {
+		WRITE_ONCE(async_error, error);
+		dpm_save_failed_dev(dev_name(dev));
+		pm_dev_err(dev, state, async ? " async" : "", error);
+	}
 
 	complete_all(&dev->power.completion);
 	TRACE_SUSPEND(error);
-	return error;
-}
 
-static void async_suspend(void *data, async_cookie_t cookie)
-{
-	struct device *dev = (struct device *)data;
-	int error;
-
-	error = __device_suspend(dev, pm_transition, true);
-	if (error) {
-		dpm_save_failed_dev(dev_name(dev));
-		pm_dev_err(dev, pm_transition, " async", error);
-	}
+	if (error || READ_ONCE(async_error))
+		return;
 
-	put_device(dev);
+	dpm_async_suspend_superior(dev, async_suspend);
 }
 
-static int device_suspend(struct device *dev)
+static void async_suspend(void *data, async_cookie_t cookie)
 {
-	if (dpm_async_fn(dev, async_suspend))
-		return 0;
+	struct device *dev = data;
 
-	return __device_suspend(dev, pm_transition, false);
+	device_suspend(dev, pm_transition, true);
+	put_device(dev);
 }
 
 /**
@@ -1736,7 +2007,8 @@ static int device_suspend(struct device *dev)
 int dpm_suspend(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
-	int error = 0;
+	struct device *dev;
+	int error;
 
 	trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
 	might_sleep();
@@ -1744,43 +2016,103 @@ int dpm_suspend(pm_message_t state)
 	devfreq_suspend();
 	cpufreq_suspend();
 
-	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
+
+	mutex_lock(&dpm_list_mtx);
+
+	/*
+	 * Start processing "async" leaf devices upfront so they don't need to
+	 * wait for the "sync" devices they don't depend on.
+	 */
+	list_for_each_entry_reverse(dev, &dpm_prepared_list, power.entry) {
+		dpm_clear_async_state(dev);
+		if (dpm_leaf_device(dev))
+			dpm_async_with_cleanup(dev, async_suspend);
+	}
+
 	while (!list_empty(&dpm_prepared_list)) {
-		struct device *dev = to_device(dpm_prepared_list.prev);
+		dev = to_device(dpm_prepared_list.prev);
+
+		list_move(&dev->power.entry, &dpm_suspended_list);
+
+		if (dpm_async_fn(dev, async_suspend))
+			continue;
 
 		get_device(dev);
+
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_suspend(dev);
+		device_suspend(dev, state, false);
+
+		put_device(dev);
 
 		mutex_lock(&dpm_list_mtx);
-		if (error) {
-			pm_dev_err(dev, state, "", error);
-			dpm_save_failed_dev(dev_name(dev));
-			put_device(dev);
+
+		if (READ_ONCE(async_error)) {
+			dpm_async_suspend_complete_all(&dpm_prepared_list);
+			/*
+			 * Move all devices to the target list to resume them
+			 * properly.
+			 */
+			list_splice_init(&dpm_prepared_list, &dpm_suspended_list);
 			break;
 		}
-		if (!list_empty(&dev->power.entry))
-			list_move(&dev->power.entry, &dpm_suspended_list);
-		put_device(dev);
-		if (async_error)
-			break;
 	}
+
 	mutex_unlock(&dpm_list_mtx);
+
 	async_synchronize_full();
-	if (!error)
-		error = async_error;
-	if (error) {
-		suspend_stats.failed_suspend++;
+
+	error = READ_ONCE(async_error);
+	if (error)
 		dpm_save_failed_step(SUSPEND_SUSPEND);
-	}
+
 	dpm_show_time(starttime, state, error, NULL);
 	trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
 	return error;
 }
 
+static bool device_prepare_smart_suspend(struct device *dev)
+{
+	struct device_link *link;
+	bool ret = true;
+	int idx;
+
+	/*
+	 * The "smart suspend" feature is enabled for devices whose drivers ask
+	 * for it and for devices without PM callbacks.
+	 *
+	 * However, if "smart suspend" is not enabled for the device's parent
+	 * or any of its suppliers that take runtime PM into account, it cannot
+	 * be enabled for the device either.
+	 */
+	if (!dev->power.no_pm_callbacks &&
+	    !dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND))
+		return false;
+
+	if (dev->parent && !dev_pm_smart_suspend(dev->parent) &&
+	    !dev->parent->power.ignore_children && !pm_runtime_blocked(dev->parent))
+		return false;
+
+	idx = device_links_read_lock();
+
+	dev_for_each_link_to_supplier(link, dev) {
+		if (!device_link_test(link, DL_FLAG_PM_RUNTIME))
+			continue;
+
+		if (!dev_pm_smart_suspend(link->supplier) &&
+		    !pm_runtime_blocked(link->supplier)) {
+			ret = false;
+			break;
+		}
+	}
+
+	device_links_read_unlock(idx);
+
+	return ret;
+}
+
 /**
  * device_prepare - Prepare a device for system power transition.
  * @dev: Device to handle.
@@ -1792,11 +2124,9 @@ int dpm_suspend(pm_message_t state)
 static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int (*callback)(struct device *) = NULL;
+	bool smart_suspend;
 	int ret = 0;
 
-	if (dev->power.syscore)
-		return 0;
-
 	/*
 	 * If a device's parent goes into runtime suspend at the wrong time,
 	 * it won't be possible to resume the device.  To prevent this we
@@ -1804,10 +2134,21 @@ static int device_prepare(struct device *dev, pm_message_t state)
 	 * it again during the complete phase.
 	 */
 	pm_runtime_get_noresume(dev);
+	/*
+	 * If runtime PM is disabled for the device at this point and it has
+	 * never been enabled so far, it should not be enabled until this system
+	 * suspend-resume cycle is complete, so prepare to trigger a warning on
+	 * subsequent attempts to enable it.
+	 */
+	smart_suspend = !pm_runtime_block_if_disabled(dev);
+
+	if (dev->power.syscore)
+		return 0;
 
 	device_lock(dev);
 
 	dev->power.wakeup_path = false;
+	dev->power.out_band_wakeup = false;
 
 	if (dev->power.no_pm_callbacks)
 		goto unlock;
@@ -1831,10 +2172,17 @@ unlock:
 	device_unlock(dev);
 
 	if (ret < 0) {
-		suspend_report_result(callback, ret);
+		suspend_report_result(dev, callback, ret);
 		pm_runtime_put(dev);
 		return ret;
 	}
+	/* Do not enable "smart suspend" for devices with disabled runtime PM. */
+	if (smart_suspend)
+		smart_suspend = device_prepare_smart_suspend(dev);
+
+	spin_lock_irq(&dev->power.lock);
+
+	dev->power.smart_suspend = smart_suspend;
 	/*
 	 * A positive return value from ->prepare() means "this device appears
 	 * to be runtime-suspended and its state is fine, so if it really is
@@ -1842,11 +2190,12 @@ unlock:
 	 * will do the same thing with all of its descendants".  This only
 	 * applies to suspend transitions, however.
 	 */
-	spin_lock_irq(&dev->power.lock);
 	dev->power.direct_complete = state.event == PM_EVENT_SUSPEND &&
 		(ret > 0 || dev->power.no_pm_callbacks) &&
 		!dev_pm_test_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
 	spin_unlock_irq(&dev->power.lock);
+
 	return 0;
 }
 
@@ -1861,7 +2210,6 @@ int dpm_prepare(pm_message_t state)
 	int error = 0;
 
 	trace_suspend_resume(TPS("dpm_prepare"), state.event, true);
-	might_sleep();
 
 	/*
 	 * Give a chance for the known devices to complete their probes, before
@@ -1878,10 +2226,11 @@ int dpm_prepare(pm_message_t state)
 	device_block_probing();
 
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_list)) {
+	while (!list_empty(&dpm_list) && !error) {
 		struct device *dev = to_device(dpm_list.next);
 
 		get_device(dev);
+
 		mutex_unlock(&dpm_list_mtx);
 
 		trace_device_pm_callback_start(dev, "", state.event);
@@ -1889,21 +2238,23 @@ int dpm_prepare(pm_message_t state)
 		trace_device_pm_callback_end(dev, error);
 
 		mutex_lock(&dpm_list_mtx);
-		if (error) {
-			if (error == -EAGAIN) {
-				put_device(dev);
-				error = 0;
-				continue;
-			}
+
+		if (!error) {
+			dev->power.is_prepared = true;
+			if (!list_empty(&dev->power.entry))
+				list_move_tail(&dev->power.entry, &dpm_prepared_list);
+		} else if (error == -EAGAIN) {
+			error = 0;
+		} else {
 			dev_info(dev, "not prepared for power transition: code %d\n",
 				 error);
-			put_device(dev);
-			break;
 		}
-		dev->power.is_prepared = true;
-		if (!list_empty(&dev->power.entry))
-			list_move_tail(&dev->power.entry, &dpm_prepared_list);
+
+		mutex_unlock(&dpm_list_mtx);
+
 		put_device(dev);
+
+		mutex_lock(&dpm_list_mtx);
 	}
 	mutex_unlock(&dpm_list_mtx);
 	trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
@@ -1923,20 +2274,22 @@ int dpm_suspend_start(pm_message_t state)
 	int error;
 
 	error = dpm_prepare(state);
-	if (error) {
-		suspend_stats.failed_prepare++;
+	if (error)
 		dpm_save_failed_step(SUSPEND_PREPARE);
-	} else
+	else {
+		pm_restrict_gfp_mask();
 		error = dpm_suspend(state);
+	}
+
 	dpm_show_time(starttime, state, error, "start");
 	return error;
 }
 EXPORT_SYMBOL_GPL(dpm_suspend_start);
 
-void __suspend_report_result(const char *function, void *fn, int ret)
+void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret)
 {
 	if (ret)
-		pr_err("%s(): %pS returns %d\n", function, fn, ret);
+		dev_err(dev, "%s(): %ps returns %d\n", function, fn, ret);
 }
 EXPORT_SYMBOL_GPL(__suspend_report_result);
 
@@ -1991,7 +2344,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
 
 void device_pm_check_callbacks(struct device *dev)
 {
-	spin_lock_irq(&dev->power.lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
 	dev->power.no_pm_callbacks =
 		(!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
 		 !dev->bus->suspend && !dev->bus->resume)) &&
@@ -2000,11 +2355,10 @@ void device_pm_check_callbacks(struct device *dev)
 		(!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
 		(!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
 		 !dev->driver->suspend && !dev->driver->resume));
-	spin_unlock_irq(&dev->power.lock);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
 
 bool dev_pm_skip_suspend(struct device *dev)
 {
-	return dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) &&
-		pm_runtime_status_suspended(dev);
+	return dev_pm_smart_suspend(dev) && pm_runtime_status_suspended(dev);
 }
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 54292cdd7808..922ed457db19 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -25,8 +25,11 @@ extern u64 pm_runtime_active_time(struct device *dev);
 
 #define WAKE_IRQ_DEDICATED_ALLOCATED	BIT(0)
 #define WAKE_IRQ_DEDICATED_MANAGED	BIT(1)
+#define WAKE_IRQ_DEDICATED_REVERSE	BIT(2)
 #define WAKE_IRQ_DEDICATED_MASK		(WAKE_IRQ_DEDICATED_ALLOCATED | \
-					 WAKE_IRQ_DEDICATED_MANAGED)
+					 WAKE_IRQ_DEDICATED_MANAGED | \
+					 WAKE_IRQ_DEDICATED_REVERSE)
+#define WAKE_IRQ_DEDICATED_ENABLED	BIT(3)
 
 struct wake_irq {
 	struct device *dev;
@@ -39,7 +42,8 @@ extern void dev_pm_arm_wake_irq(struct wake_irq *wirq);
 extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq);
 extern void dev_pm_enable_wake_irq_check(struct device *dev,
 					 bool can_change_status);
-extern void dev_pm_disable_wake_irq_check(struct device *dev);
+extern void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable);
+extern void dev_pm_enable_wake_irq_complete(struct device *dev);
 
 #ifdef CONFIG_PM_SLEEP
 
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 8e93167f1783..ff393cba7649 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -137,6 +137,7 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(dev_pm_qos_read_value);
 
 /**
  * apply_constraint - Add/modify/remove device PM QoS request.
@@ -201,7 +202,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
 	if (!qos)
 		return -ENOMEM;
 
-	n = kzalloc(3 * sizeof(*n), GFP_KERNEL);
+	n = kcalloc(3, sizeof(*n), GFP_KERNEL);
 	if (!n) {
 		kfree(qos);
 		return -ENOMEM;
diff --git a/drivers/base/power/runtime-test.c b/drivers/base/power/runtime-test.c
new file mode 100644
index 000000000000..1535ad2b0264
--- /dev/null
+++ b/drivers/base/power/runtime-test.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2025 Google, Inc.
+ */
+
+#include <linux/cleanup.h>
+#include <linux/pm_runtime.h>
+#include <kunit/device.h>
+#include <kunit/test.h>
+
+#define DEVICE_NAME "pm_runtime_test_device"
+
+static void pm_runtime_depth_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	pm_runtime_enable(dev);
+
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_get_sync(dev)); /* "already active" */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+/* Test pm_runtime_put() and friends when already suspended. */
+static void pm_runtime_already_suspended_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	pm_runtime_enable(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	pm_runtime_get_noresume(dev);
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_request_autosuspend(dev));
+
+	pm_runtime_get_noresume(dev);
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync_autosuspend(dev));
+
+	pm_runtime_get_noresume(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	/* Grab 2 refcounts */
+	pm_runtime_get_noresume(dev);
+	pm_runtime_get_noresume(dev);
+	/* The first put() sees usage_count 1 */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync_autosuspend(dev));
+	/* The second put() sees usage_count 0 but tells us "already suspended". */
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync_autosuspend(dev));
+
+	/* Should have remained suspended the whole time. */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+static void pm_runtime_idle_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	pm_runtime_enable(dev);
+
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	pm_runtime_put_noidle(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_idle(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_idle(dev));
+}
+
+static void pm_runtime_disabled_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	/* Never called pm_runtime_enable() */
+	KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev));
+
+	/* "disabled" is treated as "active" */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_FALSE(test, pm_runtime_suspended(dev));
+
+	/*
+	 * Note: these "fail", but they still acquire/release refcounts, so
+	 * keep them balanced.
+	 */
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get(dev));
+	pm_runtime_put(dev);
+
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get(dev));
+	pm_runtime_put_autosuspend(dev);
+
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_resume_and_get(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_request_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_request_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_request_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_autosuspend(dev));
+
+	/* Still disabled */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev));
+}
+
+static void pm_runtime_error_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	pm_runtime_enable(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	/* Fake a .runtime_resume() error */
+	dev->power.runtime_error = -EIO;
+
+	/*
+	 * Note: these "fail", but they still acquire/release refcounts, so
+	 * keep them balanced.
+	 */
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev));
+	pm_runtime_put(dev);
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev));
+	pm_runtime_put_autosuspend(dev);
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_put_sync_autosuspend(dev));
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_resume_and_get(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_autosuspend(dev));
+
+	/* Error is still pending */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, -EIO, dev->power.runtime_error);
+	/* Clear error */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_set_suspended(dev));
+	KUNIT_EXPECT_EQ(test, 0, dev->power.runtime_error);
+	/* Still suspended */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get(dev));
+	pm_runtime_barrier(dev);
+	pm_runtime_put(dev);
+	pm_runtime_suspend(dev); /* flush the put(), to suspend */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	pm_runtime_put_autosuspend(dev);
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_resume_and_get(dev));
+
+	/*
+	 * The following should all return -EAGAIN (usage is non-zero) or 1
+	 * (already resumed).
+	 */
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_idle(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_request_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_autosuspend(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+
+	/* Suspended again */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+/*
+ * Explore a typical probe() sequence in which a device marks itself powered,
+ * but doesn't hold any runtime PM reference, so it suspends as soon as it goes
+ * idle.
+ */
+static void pm_runtime_probe_active_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	KUNIT_EXPECT_TRUE(test, pm_runtime_status_suspended(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_set_active(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+
+	pm_runtime_enable(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+
+	/* Nothing to flush. We stay active. */
+	pm_runtime_barrier(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+
+	/* Ask for idle? Now we suspend. */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_idle(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+static struct kunit_case pm_runtime_test_cases[] = {
+	KUNIT_CASE(pm_runtime_depth_test),
+	KUNIT_CASE(pm_runtime_already_suspended_test),
+	KUNIT_CASE(pm_runtime_idle_test),
+	KUNIT_CASE(pm_runtime_disabled_test),
+	KUNIT_CASE(pm_runtime_error_test),
+	KUNIT_CASE(pm_runtime_probe_active_test),
+	{}
+};
+
+static struct kunit_suite pm_runtime_test_suite = {
+	.name = "pm_runtime_test_cases",
+	.test_cases = pm_runtime_test_cases,
+};
+
+kunit_test_suite(pm_runtime_test_suite);
+MODULE_DESCRIPTION("Runtime power management unit test suite");
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8a66eaf731e4..84676cc24221 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_wakeirq.h>
+#include <linux/rculist.h>
 #include <trace/events/rpm.h>
 
 #include "../base.h"
@@ -18,10 +19,24 @@
 
 typedef int (*pm_callback_t)(struct device *);
 
+static inline pm_callback_t get_callback_ptr(const void *start, size_t offset)
+{
+	return *(pm_callback_t *)(start + offset);
+}
+
+static pm_callback_t __rpm_get_driver_callback(struct device *dev,
+					       size_t cb_offset)
+{
+	if (dev->driver && dev->driver->pm)
+		return get_callback_ptr(dev->driver->pm, cb_offset);
+
+	return NULL;
+}
+
 static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset)
 {
-	pm_callback_t cb;
 	const struct dev_pm_ops *ops;
+	pm_callback_t cb = NULL;
 
 	if (dev->pm_domain)
 		ops = &dev->pm_domain->ops;
@@ -35,12 +50,10 @@ static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset)
 		ops = NULL;
 
 	if (ops)
-		cb = *(pm_callback_t *)((void *)ops + cb_offset);
-	else
-		cb = NULL;
+		cb = get_callback_ptr(ops, cb_offset);
 
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = *(pm_callback_t *)((void *)dev->driver->pm + cb_offset);
+	if (!cb)
+		cb = __rpm_get_driver_callback(dev, cb_offset);
 
 	return cb;
 }
@@ -77,7 +90,7 @@ static void update_pm_runtime_accounting(struct device *dev)
 	/*
 	 * Because ktime_get_mono_fast_ns() is not monotonic during
 	 * timekeeping updates, ensure that 'now' is after the last saved
-	 * timesptamp.
+	 * timestamp.
 	 */
 	if (now < last)
 		return;
@@ -93,6 +106,7 @@ static void update_pm_runtime_accounting(struct device *dev)
 static void __update_runtime_status(struct device *dev, enum rpm_status status)
 {
 	update_pm_runtime_accounting(dev);
+	trace_rpm_status(dev, status);
 	dev->power.runtime_status = status;
 }
 
@@ -203,7 +217,7 @@ static int dev_memalloc_noio(struct device *dev, void *data)
  *     resume/suspend callback of any one of its ancestors(or the
  *     block device itself), the deadlock may be triggered inside the
  *     memory allocation since it might not complete until the block
- *     device becomes active and the involed page I/O finishes. The
+ *     device becomes active and the involved page I/O finishes. The
  *     situation is pointed out first by Alan Stern. Network device
  *     are involved in iSCSI kind of situation.
  *
@@ -243,8 +257,7 @@ void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
 		 * flag was set by any one of the descendants.
 		 */
 		if (!dev || (!enable &&
-			     device_for_each_child(dev, NULL,
-						   dev_memalloc_noio)))
+		    device_for_each_child(dev, NULL, dev_memalloc_noio)))
 			break;
 	}
 	mutex_unlock(&dev_hotplug_mutex);
@@ -263,17 +276,15 @@ static int rpm_check_suspend_allowed(struct device *dev)
 		retval = -EINVAL;
 	else if (dev->power.disable_depth > 0)
 		retval = -EACCES;
-	else if (atomic_read(&dev->power.usage_count) > 0)
+	else if (atomic_read(&dev->power.usage_count))
 		retval = -EAGAIN;
-	else if (!dev->power.ignore_children &&
-			atomic_read(&dev->power.child_count))
+	else if (!dev->power.ignore_children && atomic_read(&dev->power.child_count))
 		retval = -EBUSY;
 
 	/* Pending resume requests take precedence over suspends. */
-	else if ((dev->power.deferred_resume
-			&& dev->power.runtime_status == RPM_SUSPENDING)
-	    || (dev->power.request_pending
-			&& dev->power.request == RPM_REQ_RESUME))
+	else if ((dev->power.deferred_resume &&
+	    dev->power.runtime_status == RPM_SUSPENDING) ||
+	    (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME))
 		retval = -EAGAIN;
 	else if (__dev_pm_qos_resume_latency(dev) == 0)
 		retval = -EPERM;
@@ -291,7 +302,7 @@ static int rpm_get_suppliers(struct device *dev)
 				device_links_read_lock_held()) {
 		int retval;
 
-		if (!(link->flags & DL_FLAG_PM_RUNTIME))
+		if (!device_link_test(link, DL_FLAG_PM_RUNTIME))
 			continue;
 
 		retval = pm_runtime_get_sync(link->supplier);
@@ -305,16 +316,34 @@ static int rpm_get_suppliers(struct device *dev)
 	return 0;
 }
 
+/**
+ * pm_runtime_release_supplier - Drop references to device link's supplier.
+ * @link: Target device link.
+ *
+ * Drop all runtime PM references associated with @link to its supplier device.
+ */
+void pm_runtime_release_supplier(struct device_link *link)
+{
+	struct device *supplier = link->supplier;
+
+	/*
+	 * The additional power.usage_count check is a safety net in case
+	 * the rpm_active refcount becomes saturated, in which case
+	 * refcount_dec_not_one() would return true forever, but it is not
+	 * strictly necessary.
+	 */
+	while (refcount_dec_not_one(&link->rpm_active) &&
+	       atomic_read(&supplier->power.usage_count) > 0)
+		pm_runtime_put_noidle(supplier);
+}
+
 static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
 {
 	struct device_link *link;
 
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
 				device_links_read_lock_held()) {
-
-		while (refcount_dec_not_one(&link->rpm_active))
-			pm_runtime_put_noidle(link->supplier);
-
+		pm_runtime_release_supplier(link);
 		if (try_to_suspend)
 			pm_request_idle(link->supplier);
 	}
@@ -386,9 +415,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 		 *
 		 * Do that if resume fails too.
 		 */
-		if (use_links
-		    && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
-		    || (dev->power.runtime_status == RPM_RESUMING && retval))) {
+		if (use_links &&
+		    ((dev->power.runtime_status == RPM_SUSPENDING && !retval) ||
+		    (dev->power.runtime_status == RPM_RESUMING && retval))) {
 			idx = device_links_read_lock();
 
 			__rpm_put_suppliers(dev, false);
@@ -404,6 +433,49 @@ fail:
 }
 
 /**
+ * rpm_callback - Run a given runtime PM callback for a given device.
+ * @cb: Runtime PM callback to run.
+ * @dev: Device to run the callback for.
+ */
+static int rpm_callback(int (*cb)(struct device *), struct device *dev)
+{
+	int retval;
+
+	if (dev->power.memalloc_noio) {
+		unsigned int noio_flag;
+
+		/*
+		 * Deadlock might be caused if memory allocation with
+		 * GFP_KERNEL happens inside runtime_suspend and
+		 * runtime_resume callbacks of one block device's
+		 * ancestor or the block device itself. Network
+		 * device might be thought as part of iSCSI block
+		 * device, so network device and its ancestor should
+		 * be marked as memalloc_noio too.
+		 */
+		noio_flag = memalloc_noio_save();
+		retval = __rpm_callback(cb, dev);
+		memalloc_noio_restore(noio_flag);
+	} else {
+		retval = __rpm_callback(cb, dev);
+	}
+
+	/*
+	 * Since -EACCES means that runtime PM is disabled for the given device,
+	 * it should not be returned by runtime PM callbacks.  If it is returned
+	 * nevertheless, assume it to be a transient error and convert it to
+	 * -EAGAIN.
+	 */
+	if (retval == -EACCES)
+		retval = -EAGAIN;
+
+	if (retval != -EAGAIN && retval != -EBUSY)
+		dev->power.runtime_error = retval;
+
+	return retval;
+}
+
+/**
  * rpm_idle - Notify device bus type if the device can be suspended.
  * @dev: Device to notify the bus type about.
  * @rpmflags: Flag bits.
@@ -421,11 +493,14 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	int (*callback)(struct device *);
 	int retval;
 
-	trace_rpm_idle_rcuidle(dev, rpmflags);
+	trace_rpm_idle(dev, rpmflags);
 	retval = rpm_check_suspend_allowed(dev);
 	if (retval < 0)
 		;	/* Conditions are wrong. */
 
+	else if ((rpmflags & RPM_GET_PUT) && retval == 1)
+		;	/* put() is allowed in RPM_SUSPENDED */
+
 	/* Idle notifications are allowed only in the RPM_ACTIVE state. */
 	else if (dev->power.runtime_status != RPM_ACTIVE)
 		retval = -EAGAIN;
@@ -441,6 +516,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	/* Act as though RPM_NOWAIT is always set. */
 	else if (dev->power.idle_notification)
 		retval = -EINPROGRESS;
+
 	if (retval)
 		goto out;
 
@@ -460,55 +536,33 @@ static int rpm_idle(struct device *dev, int rpmflags)
 			dev->power.request_pending = true;
 			queue_work(pm_wq, &dev->power.work);
 		}
-		trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
+		trace_rpm_return_int(dev, _THIS_IP_, 0);
 		return 0;
 	}
 
 	dev->power.idle_notification = true;
 
-	retval = __rpm_callback(callback, dev);
+	if (dev->power.irq_safe)
+		spin_unlock(&dev->power.lock);
+	else
+		spin_unlock_irq(&dev->power.lock);
+
+	retval = callback(dev);
+
+	if (dev->power.irq_safe)
+		spin_lock(&dev->power.lock);
+	else
+		spin_lock_irq(&dev->power.lock);
 
 	dev->power.idle_notification = false;
 	wake_up_all(&dev->power.wait_queue);
 
  out:
-	trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+	trace_rpm_return_int(dev, _THIS_IP_, retval);
 	return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
 /**
- * rpm_callback - Run a given runtime PM callback for a given device.
- * @cb: Runtime PM callback to run.
- * @dev: Device to run the callback for.
- */
-static int rpm_callback(int (*cb)(struct device *), struct device *dev)
-{
-	int retval;
-
-	if (dev->power.memalloc_noio) {
-		unsigned int noio_flag;
-
-		/*
-		 * Deadlock might be caused if memory allocation with
-		 * GFP_KERNEL happens inside runtime_suspend and
-		 * runtime_resume callbacks of one block device's
-		 * ancestor or the block device itself. Network
-		 * device might be thought as part of iSCSI block
-		 * device, so network device and its ancestor should
-		 * be marked as memalloc_noio too.
-		 */
-		noio_flag = memalloc_noio_save();
-		retval = __rpm_callback(cb, dev);
-		memalloc_noio_restore(noio_flag);
-	} else {
-		retval = __rpm_callback(cb, dev);
-	}
-
-	dev->power.runtime_error = retval;
-	return retval != -EACCES ? retval : -EIO;
-}
-
-/**
  * rpm_suspend - Carry out runtime suspend of given device.
  * @dev: Device to suspend.
  * @rpmflags: Flag bits.
@@ -536,7 +590,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	struct device *parent = NULL;
 	int retval;
 
-	trace_rpm_suspend_rcuidle(dev, rpmflags);
+	trace_rpm_suspend(dev, rpmflags);
 
  repeat:
 	retval = rpm_check_suspend_allowed(dev);
@@ -546,12 +600,12 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	/* Synchronous suspends are not allowed in the RPM_RESUMING state. */
 	if (dev->power.runtime_status == RPM_RESUMING && !(rpmflags & RPM_ASYNC))
 		retval = -EAGAIN;
+
 	if (retval)
 		goto out;
 
 	/* If the autosuspend_delay time hasn't expired yet, reschedule. */
-	if ((rpmflags & RPM_AUTO)
-	    && dev->power.runtime_status != RPM_SUSPENDING) {
+	if ((rpmflags & RPM_AUTO) && dev->power.runtime_status != RPM_SUSPENDING) {
 		u64 expires = pm_runtime_autosuspend_expiration(dev);
 
 		if (expires != 0) {
@@ -566,7 +620,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 			 * rest.
 			 */
 			if (!(dev->power.timer_expires &&
-					dev->power.timer_expires <= expires)) {
+			    dev->power.timer_expires <= expires)) {
 				/*
 				 * We add a slack of 25% to gather wakeups
 				 * without sacrificing the granularity.
@@ -576,9 +630,9 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
 				dev->power.timer_expires = expires;
 				hrtimer_start_range_ns(&dev->power.suspend_timer,
-						ns_to_ktime(expires),
-						slack,
-						HRTIMER_MODE_ABS);
+						       ns_to_ktime(expires),
+						       slack,
+						       HRTIMER_MODE_ABS);
 			}
 			dev->power.timer_autosuspends = 1;
 			goto out;
@@ -645,6 +699,8 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	if (retval)
 		goto fail;
 
+	dev_pm_enable_wake_irq_complete(dev);
+
  no_callback:
 	__update_runtime_status(dev, RPM_SUSPENDED);
 	pm_runtime_deactivate_timer(dev);
@@ -685,31 +741,28 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	}
 
  out:
-	trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+	trace_rpm_return_int(dev, _THIS_IP_, retval);
 
 	return retval;
 
  fail:
-	dev_pm_disable_wake_irq_check(dev);
+	dev_pm_disable_wake_irq_check(dev, true);
 	__update_runtime_status(dev, RPM_ACTIVE);
 	dev->power.deferred_resume = false;
 	wake_up_all(&dev->power.wait_queue);
 
-	if (retval == -EAGAIN || retval == -EBUSY) {
-		dev->power.runtime_error = 0;
+	/*
+	 * On transient errors, if the callback routine failed an autosuspend,
+	 * and if the last_busy time has been updated so that there is a new
+	 * autosuspend expiration time, automatically reschedule another
+	 * autosuspend.
+	 */
+	if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) &&
+	    pm_runtime_autosuspend_expiration(dev) != 0)
+		goto repeat;
+
+	pm_runtime_cancel_pending(dev);
 
-		/*
-		 * If the callback routine failed an autosuspend, and
-		 * if the last_busy time has been updated so that there
-		 * is a new autosuspend expiration time, automatically
-		 * reschedule another autosuspend.
-		 */
-		if ((rpmflags & RPM_AUTO) &&
-		    pm_runtime_autosuspend_expiration(dev) != 0)
-			goto repeat;
-	} else {
-		pm_runtime_cancel_pending(dev);
-	}
 	goto out;
 }
 
@@ -737,16 +790,20 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	struct device *parent = NULL;
 	int retval = 0;
 
-	trace_rpm_resume_rcuidle(dev, rpmflags);
+	trace_rpm_resume(dev, rpmflags);
 
  repeat:
-	if (dev->power.runtime_error)
+	if (dev->power.runtime_error) {
 		retval = -EINVAL;
-	else if (dev->power.disable_depth == 1 && dev->power.is_suspended
-	    && dev->power.runtime_status == RPM_ACTIVE)
-		retval = 1;
-	else if (dev->power.disable_depth > 0)
-		retval = -EACCES;
+	} else if (dev->power.disable_depth > 0) {
+		if (dev->power.runtime_status == RPM_ACTIVE &&
+		    dev->power.last_status == RPM_ACTIVE)
+			retval = 1;
+		else if (rpmflags & RPM_TRANSPARENT)
+			goto out;
+		else
+			retval = -EACCES;
+	}
 	if (retval)
 		goto out;
 
@@ -765,15 +822,18 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		goto out;
 	}
 
-	if (dev->power.runtime_status == RPM_RESUMING
-	    || dev->power.runtime_status == RPM_SUSPENDING) {
+	if (dev->power.runtime_status == RPM_RESUMING ||
+	    dev->power.runtime_status == RPM_SUSPENDING) {
 		DEFINE_WAIT(wait);
 
 		if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
-			if (dev->power.runtime_status == RPM_SUSPENDING)
+			if (dev->power.runtime_status == RPM_SUSPENDING) {
 				dev->power.deferred_resume = true;
-			else
+				if (rpmflags & RPM_NOWAIT)
+					retval = -EINPROGRESS;
+			} else {
 				retval = -EINPROGRESS;
+			}
 			goto out;
 		}
 
@@ -790,8 +850,8 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		for (;;) {
 			prepare_to_wait(&dev->power.wait_queue, &wait,
 					TASK_UNINTERRUPTIBLE);
-			if (dev->power.runtime_status != RPM_RESUMING
-			    && dev->power.runtime_status != RPM_SUSPENDING)
+			if (dev->power.runtime_status != RPM_RESUMING &&
+			    dev->power.runtime_status != RPM_SUSPENDING)
 				break;
 
 			spin_unlock_irq(&dev->power.lock);
@@ -811,9 +871,9 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	 */
 	if (dev->power.no_callbacks && !parent && dev->parent) {
 		spin_lock_nested(&dev->parent->power.lock, SINGLE_DEPTH_NESTING);
-		if (dev->parent->power.disable_depth > 0
-		    || dev->parent->power.ignore_children
-		    || dev->parent->power.runtime_status == RPM_ACTIVE) {
+		if (dev->parent->power.disable_depth > 0 ||
+		    dev->parent->power.ignore_children ||
+		    dev->parent->power.runtime_status == RPM_ACTIVE) {
 			atomic_inc(&dev->parent->power.child_count);
 			spin_unlock(&dev->parent->power.lock);
 			retval = 1;
@@ -842,6 +902,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		parent = dev->parent;
 		if (dev->power.irq_safe)
 			goto skip_parent;
+
 		spin_unlock(&dev->power.lock);
 
 		pm_runtime_get_noresume(parent);
@@ -851,8 +912,8 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		 * Resume the parent if it has runtime PM enabled and not been
 		 * set to ignore its children.
 		 */
-		if (!parent->power.disable_depth
-		    && !parent->power.ignore_children) {
+		if (!parent->power.disable_depth &&
+		    !parent->power.ignore_children) {
 			rpm_resume(parent, 0);
 			if (parent->power.runtime_status != RPM_ACTIVE)
 				retval = -EBUSY;
@@ -862,6 +923,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		spin_lock(&dev->power.lock);
 		if (retval)
 			goto out;
+
 		goto repeat;
 	}
  skip_parent:
@@ -873,7 +935,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 
 	callback = RPM_GET_CALLBACK(dev, runtime_resume);
 
-	dev_pm_disable_wake_irq_check(dev);
+	dev_pm_disable_wake_irq_check(dev, false);
 	retval = rpm_callback(callback, dev);
 	if (retval) {
 		__update_runtime_status(dev, RPM_SUSPENDED);
@@ -900,7 +962,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		spin_lock_irq(&dev->power.lock);
 	}
 
-	trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+	trace_rpm_return_int(dev, _THIS_IP_, retval);
 
 	return retval;
 }
@@ -966,7 +1028,7 @@ static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
 	 * If 'expires' is after the current time, we've been called
 	 * too early.
 	 */
-	if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
+	if (expires > 0 && expires <= ktime_get_mono_fast_ns()) {
 		dev->power.timer_expires = 0;
 		rpm_suspend(dev, dev->power.timer_autosuspends ?
 		    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
@@ -1014,13 +1076,33 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 }
 EXPORT_SYMBOL_GPL(pm_schedule_suspend);
 
+static int rpm_drop_usage_count(struct device *dev)
+{
+	int ret;
+
+	ret = atomic_sub_return(1, &dev->power.usage_count);
+	if (ret >= 0)
+		return ret;
+
+	/*
+	 * Because rpm_resume() does not check the usage counter, it will resume
+	 * the device even if the usage counter is 0 or negative, so it is
+	 * sufficient to increment the usage counter here to reverse the change
+	 * made above.
+	 */
+	atomic_inc(&dev->power.usage_count);
+	dev_warn(dev, "Runtime PM usage count underflow!\n");
+	return -EINVAL;
+}
+
 /**
  * __pm_runtime_idle - Entry point for runtime idle operations.
  * @dev: Device to send idle notification for.
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero.  Then carry out an idle
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error).  Then carry out an idle
  * notification, either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1032,8 +1114,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
 	int retval;
 
 	if (rpmflags & RPM_GET_PUT) {
-		if (!atomic_dec_and_test(&dev->power.usage_count)) {
-			trace_rpm_usage_rcuidle(dev, rpmflags);
+		retval = rpm_drop_usage_count(dev);
+		if (retval < 0) {
+			return retval;
+		} else if (retval > 0) {
+			trace_rpm_usage(dev, rpmflags);
 			return 0;
 		}
 	}
@@ -1054,7 +1139,8 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero.  Then carry out a suspend,
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error).  Then carry out a suspend,
  * either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1066,8 +1152,11 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	int retval;
 
 	if (rpmflags & RPM_GET_PUT) {
-		if (!atomic_dec_and_test(&dev->power.usage_count)) {
-			trace_rpm_usage_rcuidle(dev, rpmflags);
+		retval = rpm_drop_usage_count(dev);
+		if (retval < 0) {
+			return retval;
+		} else if (retval > 0) {
+			trace_rpm_usage(dev, rpmflags);
 			return 0;
 		}
 	}
@@ -1113,16 +1202,18 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
 EXPORT_SYMBOL_GPL(__pm_runtime_resume);
 
 /**
- * pm_runtime_get_if_active - Conditionally bump up device usage counter.
+ * pm_runtime_get_conditional - Conditionally bump up device usage counter.
  * @dev: Device to handle.
  * @ign_usage_count: Whether or not to look at the current usage counter value.
  *
  * Return -EINVAL if runtime PM is disabled for @dev.
  *
- * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either
- * @ign_usage_count is %true or the runtime PM usage counter of @dev is not
- * zero, increment the usage counter of @dev and return 1. Otherwise, return 0
- * without changing the usage counter.
+ * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count
+ * is set, or (2) @dev is not ignoring children and its active child count is
+ * nonzero, or (3) the runtime PM usage counter of @dev is not zero, increment
+ * the usage counter of @dev and return 1.
+ *
+ * Otherwise, return 0 without changing the usage counter.
  *
  * If @ign_usage_count is %true, this function can be used to prevent suspending
  * the device when its runtime PM status is %RPM_ACTIVE.
@@ -1134,7 +1225,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
  * The caller is responsible for decrementing the runtime PM usage counter of
  * @dev after this function has returned a positive value for it.
  */
-int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count)
+static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
 {
 	unsigned long flags;
 	int retval;
@@ -1144,20 +1235,58 @@ int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count)
 		retval = -EINVAL;
 	} else if (dev->power.runtime_status != RPM_ACTIVE) {
 		retval = 0;
-	} else if (ign_usage_count) {
+	} else if (ign_usage_count || (!dev->power.ignore_children &&
+		   atomic_read(&dev->power.child_count) > 0)) {
 		retval = 1;
 		atomic_inc(&dev->power.usage_count);
 	} else {
 		retval = atomic_inc_not_zero(&dev->power.usage_count);
 	}
-	trace_rpm_usage_rcuidle(dev, 0);
+	trace_rpm_usage(dev, 0);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
+
+/**
+ * pm_runtime_get_if_active - Bump up runtime PM usage counter if the device is
+ *			      in active state
+ * @dev: Target device.
+ *
+ * Increment the runtime PM usage counter of @dev if its runtime PM status is
+ * %RPM_ACTIVE, in which case it returns 1. If the device is in a different
+ * state, 0 is returned. -EINVAL is returned if runtime PM is disabled for the
+ * device, in which case also the usage_count will remain unmodified.
+ */
+int pm_runtime_get_if_active(struct device *dev)
+{
+	return pm_runtime_get_conditional(dev, true);
+}
 EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);
 
 /**
+ * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
+ * @dev: Target device.
+ *
+ * Increment the runtime PM usage counter of @dev if its runtime PM status is
+ * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not
+ * ignoring children and its active child count is nonzero.  1 is returned in
+ * this case.
+ *
+ * If @dev is in a different state or it is not in use (that is, its usage
+ * counter is 0, or it is ignoring children, or its active child count is 0),
+ * 0 is returned.
+ *
+ * -EINVAL is returned if runtime PM is disabled for the device, in which case
+ * also the usage counter of @dev is not updated.
+ */
+int pm_runtime_get_if_in_use(struct device *dev)
+{
+	return pm_runtime_get_conditional(dev, false);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use);
+
+/**
  * __pm_runtime_set_status - Set runtime PM status of a device.
  * @dev: Device to handle.
  * @status: New runtime PM status of the device.
@@ -1185,12 +1314,13 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 {
 	struct device *parent = dev->parent;
 	bool notify_parent = false;
+	unsigned long flags;
 	int error = 0;
 
 	if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
 		return -EINVAL;
 
-	spin_lock_irq(&dev->power.lock);
+	spin_lock_irqsave(&dev->power.lock, flags);
 
 	/*
 	 * Prevent PM-runtime from being enabled for the device or return an
@@ -1201,7 +1331,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 	else
 		error = -EAGAIN;
 
-	spin_unlock_irq(&dev->power.lock);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	if (error)
 		return error;
@@ -1222,7 +1352,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 		device_links_read_unlock(idx);
 	}
 
-	spin_lock_irq(&dev->power.lock);
+	spin_lock_irqsave(&dev->power.lock, flags);
 
 	if (dev->power.runtime_status == status || !parent)
 		goto out_set;
@@ -1238,9 +1368,9 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 		 * not active, has runtime PM enabled and the
 		 * 'power.ignore_children' flag unset.
 		 */
-		if (!parent->power.disable_depth
-		    && !parent->power.ignore_children
-		    && parent->power.runtime_status != RPM_ACTIVE) {
+		if (!parent->power.disable_depth &&
+		    !parent->power.ignore_children &&
+		    parent->power.runtime_status != RPM_ACTIVE) {
 			dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n",
 				dev_name(dev),
 				dev_name(parent));
@@ -1263,7 +1393,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 		dev->power.runtime_error = 0;
 
  out:
-	spin_unlock_irq(&dev->power.lock);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	if (notify_parent)
 		pm_request_idle(parent);
@@ -1305,9 +1435,9 @@ static void __pm_runtime_barrier(struct device *dev)
 		dev->power.request_pending = false;
 	}
 
-	if (dev->power.runtime_status == RPM_SUSPENDING
-	    || dev->power.runtime_status == RPM_RESUMING
-	    || dev->power.idle_notification) {
+	if (dev->power.runtime_status == RPM_SUSPENDING ||
+	    dev->power.runtime_status == RPM_RESUMING ||
+	    dev->power.idle_notification) {
 		DEFINE_WAIT(wait);
 
 		/* Suspend, wake-up or idle notification in progress. */
@@ -1337,47 +1467,48 @@ static void __pm_runtime_barrier(struct device *dev)
  * Next, make sure that all pending requests for the device have been flushed
  * from pm_wq and wait for all runtime PM operations involving the device in
  * progress to complete.
- *
- * Return value:
- * 1, if there was a resume request pending and the device had to be woken up,
- * 0, otherwise
  */
-int pm_runtime_barrier(struct device *dev)
+void pm_runtime_barrier(struct device *dev)
 {
-	int retval = 0;
-
 	pm_runtime_get_noresume(dev);
 	spin_lock_irq(&dev->power.lock);
 
 	if (dev->power.request_pending
-	    && dev->power.request == RPM_REQ_RESUME) {
+	    && dev->power.request == RPM_REQ_RESUME)
 		rpm_resume(dev, 0);
-		retval = 1;
-	}
 
 	__pm_runtime_barrier(dev);
 
 	spin_unlock_irq(&dev->power.lock);
 	pm_runtime_put_noidle(dev);
-
-	return retval;
 }
 EXPORT_SYMBOL_GPL(pm_runtime_barrier);
 
-/**
- * __pm_runtime_disable - Disable runtime PM of a device.
- * @dev: Device to handle.
- * @check_resume: If set, check if there's a resume request for the device.
- *
- * Increment power.disable_depth for the device and if it was zero previously,
- * cancel all pending runtime PM requests for the device and wait for all
- * operations in progress to complete.  The device can be either active or
- * suspended after its runtime PM has been disabled.
- *
- * If @check_resume is set and there's a resume request pending when
- * __pm_runtime_disable() is called and power.disable_depth is zero, the
- * function will wake up the device before disabling its runtime PM.
- */
+bool pm_runtime_block_if_disabled(struct device *dev)
+{
+	bool ret;
+
+	spin_lock_irq(&dev->power.lock);
+
+	ret = !pm_runtime_enabled(dev);
+	if (ret && dev->power.last_status == RPM_INVALID)
+		dev->power.last_status = RPM_BLOCKED;
+
+	spin_unlock_irq(&dev->power.lock);
+
+	return ret;
+}
+
+void pm_runtime_unblock(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+
+	if (dev->power.last_status == RPM_BLOCKED)
+		dev->power.last_status = RPM_INVALID;
+
+	spin_unlock_irq(&dev->power.lock);
+}
+
 void __pm_runtime_disable(struct device *dev, bool check_resume)
 {
 	spin_lock_irq(&dev->power.lock);
@@ -1392,8 +1523,8 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 	 * means there probably is some I/O to process and disabling runtime PM
 	 * shouldn't prevent the device from processing the I/O.
 	 */
-	if (check_resume && dev->power.request_pending
-	    && dev->power.request == RPM_REQ_RESUME) {
+	if (check_resume && dev->power.request_pending &&
+	    dev->power.request == RPM_REQ_RESUME) {
 		/*
 		 * Prevent suspends and idle notifications from being carried
 		 * out after we have woken up the device.
@@ -1408,8 +1539,10 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 	/* Update time accounting before disabling PM-runtime. */
 	update_pm_runtime_accounting(dev);
 
-	if (!dev->power.disable_depth++)
+	if (!dev->power.disable_depth++) {
 		__pm_runtime_barrier(dev);
+		dev->power.last_status = dev->power.runtime_status;
+	}
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1426,34 +1559,107 @@ void pm_runtime_enable(struct device *dev)
 
 	spin_lock_irqsave(&dev->power.lock, flags);
 
-	if (dev->power.disable_depth > 0) {
-		dev->power.disable_depth--;
-
-		/* About to enable runtime pm, set accounting_timestamp to now */
-		if (!dev->power.disable_depth)
-			dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
-	} else {
+	if (!dev->power.disable_depth) {
 		dev_warn(dev, "Unbalanced %s!\n", __func__);
+		goto out;
+	}
+
+	if (--dev->power.disable_depth > 0)
+		goto out;
+
+	if (dev->power.last_status == RPM_BLOCKED) {
+		dev_warn(dev, "Attempt to enable runtime PM when it is blocked\n");
+		dump_stack();
 	}
+	dev->power.last_status = RPM_INVALID;
+	dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
 
-	WARN(!dev->power.disable_depth &&
-	     dev->power.runtime_status == RPM_SUSPENDED &&
-	     !dev->power.ignore_children &&
-	     atomic_read(&dev->power.child_count) > 0,
-	     "Enabling runtime PM for inactive device (%s) with active children\n",
-	     dev_name(dev));
+	if (dev->power.runtime_status == RPM_SUSPENDED &&
+	    !dev->power.ignore_children &&
+	    atomic_read(&dev->power.child_count) > 0)
+		dev_warn(dev, "Enabling runtime PM for inactive device with active children\n");
 
+out:
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
 EXPORT_SYMBOL_GPL(pm_runtime_enable);
 
+static void pm_runtime_set_suspended_action(void *data)
+{
+	pm_runtime_set_suspended(data);
+}
+
+/**
+ * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable.
+ *
+ * @dev: Device to handle.
+ */
+int devm_pm_runtime_set_active_enabled(struct device *dev)
+{
+	int err;
+
+	err = pm_runtime_set_active(dev);
+	if (err)
+		return err;
+
+	err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev);
+	if (err)
+		return err;
+
+	return devm_pm_runtime_enable(dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled);
+
+static void pm_runtime_disable_action(void *data)
+{
+	pm_runtime_dont_use_autosuspend(data);
+	pm_runtime_disable(data);
+}
+
+/**
+ * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable.
+ *
+ * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for
+ * you at driver exit time if needed.
+ *
+ * @dev: Device to handle.
+ */
+int devm_pm_runtime_enable(struct device *dev)
+{
+	pm_runtime_enable(dev);
+
+	return devm_add_action_or_reset(dev, pm_runtime_disable_action, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_runtime_enable);
+
+static void pm_runtime_put_noidle_action(void *data)
+{
+	pm_runtime_put_noidle(data);
+}
+
+/**
+ * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume.
+ *
+ * @dev: Device to handle.
+ */
+int devm_pm_runtime_get_noresume(struct device *dev)
+{
+	pm_runtime_get_noresume(dev);
+
+	return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume);
+
 /**
  * pm_runtime_forbid - Block runtime PM of a device.
  * @dev: Device to handle.
  *
- * Increase the device's usage count and clear its power.runtime_auto flag,
- * so that it cannot be suspended at run time until pm_runtime_allow() is called
- * for it.
+ * Resume @dev if already suspended and block runtime suspend of @dev in such
+ * a way that it can be unblocked via the /sys/devices/.../power/control
+ * interface, or otherwise by calling pm_runtime_allow().
+ *
+ * Calling this function many times in a row has the same effect as calling it
+ * once.
  */
 void pm_runtime_forbid(struct device *dev)
 {
@@ -1474,19 +1680,28 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
  * pm_runtime_allow - Unblock runtime PM of a device.
  * @dev: Device to handle.
  *
- * Decrease the device's usage count and set its power.runtime_auto flag.
+ * Unblock runtime suspend of @dev after it has been blocked by
+ * pm_runtime_forbid() (for instance, if it has been blocked via the
+ * /sys/devices/.../power/control interface), check if @dev can be
+ * suspended and suspend it in that case.
+ *
+ * Calling this function many times in a row has the same effect as calling it
+ * once.
  */
 void pm_runtime_allow(struct device *dev)
 {
+	int ret;
+
 	spin_lock_irq(&dev->power.lock);
 	if (dev->power.runtime_auto)
 		goto out;
 
 	dev->power.runtime_auto = true;
-	if (atomic_dec_and_test(&dev->power.usage_count))
+	ret = rpm_drop_usage_count(dev);
+	if (ret == 0)
 		rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
-	else
-		trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
+	else if (ret > 0)
+		trace_rpm_usage(dev, RPM_AUTO | RPM_ASYNC);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1526,6 +1741,7 @@ void pm_runtime_irq_safe(struct device *dev)
 {
 	if (dev->parent)
 		pm_runtime_get_sync(dev->parent);
+
 	spin_lock_irq(&dev->power.lock);
 	dev->power.irq_safe = 1;
 	spin_unlock_irq(&dev->power.lock);
@@ -1555,7 +1771,7 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
 			atomic_inc(&dev->power.usage_count);
 			rpm_resume(dev, 0);
 		} else {
-			trace_rpm_usage_rcuidle(dev, 0);
+			trace_rpm_usage(dev, 0);
 		}
 	}
 
@@ -1621,6 +1837,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
 void pm_runtime_init(struct device *dev)
 {
 	dev->power.runtime_status = RPM_SUSPENDED;
+	dev->power.last_status = RPM_INVALID;
 	dev->power.idle_notification = false;
 
 	dev->power.disable_depth = 1;
@@ -1635,12 +1852,12 @@ void pm_runtime_init(struct device *dev)
 	dev->power.request_pending = false;
 	dev->power.request = RPM_REQ_NONE;
 	dev->power.deferred_resume = false;
-	dev->power.needs_force_resume = 0;
+	dev->power.needs_force_resume = false;
 	INIT_WORK(&dev->power.work, pm_runtime_work);
 
 	dev->power.timer_expires = 0;
-	hrtimer_init(&dev->power.suspend_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	dev->power.suspend_timer.function = pm_suspend_timer_fn;
+	hrtimer_setup(&dev->power.suspend_timer, pm_suspend_timer_fn, CLOCK_MONOTONIC,
+		      HRTIMER_MODE_ABS);
 
 	init_waitqueue_head(&dev->power.wait_queue);
 }
@@ -1662,6 +1879,11 @@ void pm_runtime_reinit(struct device *dev)
 				pm_runtime_put(dev->parent);
 		}
 	}
+	/*
+	 * Clear power.needs_force_resume in case it has been set by
+	 * pm_runtime_force_suspend() invoked from a driver remove callback.
+	 */
+	dev->power.needs_force_resume = false;
 }
 
 /**
@@ -1685,12 +1907,10 @@ void pm_runtime_get_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
-				device_links_read_lock_held())
-		if (link->flags & DL_FLAG_PM_RUNTIME) {
+	dev_for_each_link_to_supplier(link, dev)
+		if (device_link_test(link, DL_FLAG_PM_RUNTIME)) {
 			link->supplier_preactivated = true;
 			pm_runtime_get_sync(link->supplier);
-			refcount_inc(&link->rpm_active);
 		}
 
 	device_links_read_unlock(idx);
@@ -1703,8 +1923,6 @@ void pm_runtime_get_suppliers(struct device *dev)
 void pm_runtime_put_suppliers(struct device *dev)
 {
 	struct device_link *link;
-	unsigned long flags;
-	bool put;
 	int idx;
 
 	idx = device_links_read_lock();
@@ -1713,12 +1931,7 @@ void pm_runtime_put_suppliers(struct device *dev)
 				device_links_read_lock_held())
 		if (link->supplier_preactivated) {
 			link->supplier_preactivated = false;
-			spin_lock_irqsave(&dev->power.lock, flags);
-			put = pm_runtime_status_suspended(dev) &&
-			      refcount_dec_not_one(&link->rpm_active);
-			spin_unlock_irqrestore(&dev->power.lock, flags);
-			if (put)
-				pm_runtime_put(link->supplier);
+			pm_runtime_put(link->supplier);
 		}
 
 	device_links_read_unlock(idx);
@@ -1749,22 +1962,31 @@ static void pm_runtime_drop_link_count(struct device *dev)
  */
 void pm_runtime_drop_link(struct device_link *link)
 {
-	if (!(link->flags & DL_FLAG_PM_RUNTIME))
+	if (!device_link_test(link, DL_FLAG_PM_RUNTIME))
 		return;
 
 	pm_runtime_drop_link_count(link->consumer);
-
-	while (refcount_dec_not_one(&link->rpm_active))
-		pm_runtime_put(link->supplier);
+	pm_runtime_release_supplier(link);
+	pm_request_idle(link->supplier);
 }
 
-static bool pm_runtime_need_not_resume(struct device *dev)
+static pm_callback_t get_callback(struct device *dev, size_t cb_offset)
 {
-	return atomic_read(&dev->power.usage_count) <= 1 &&
-		(atomic_read(&dev->power.child_count) == 0 ||
-		 dev->power.ignore_children);
+	/*
+	 * Setting power.strict_midlayer means that the middle layer
+	 * code does not want its runtime PM callbacks to be invoked via
+	 * pm_runtime_force_suspend() and pm_runtime_force_resume(), so
+	 * return a direct pointer to the driver callback in that case.
+	 */
+	if (dev_pm_strict_midlayer_is_set(dev))
+		return __rpm_get_driver_callback(dev, cb_offset);
+
+	return __rpm_get_callback(dev, cb_offset);
 }
 
+#define GET_CALLBACK(dev, callback) \
+		get_callback(dev, offsetof(struct dev_pm_ops, callback))
+
 /**
  * pm_runtime_force_suspend - Force a device into suspend state if needed.
  * @dev: Device to suspend.
@@ -1788,74 +2010,106 @@ int pm_runtime_force_suspend(struct device *dev)
 	int ret;
 
 	pm_runtime_disable(dev);
-	if (pm_runtime_status_suspended(dev))
+	if (pm_runtime_status_suspended(dev) || dev->power.needs_force_resume)
 		return 0;
 
-	callback = RPM_GET_CALLBACK(dev, runtime_suspend);
+	callback = GET_CALLBACK(dev, runtime_suspend);
 
+	dev_pm_enable_wake_irq_check(dev, true);
 	ret = callback ? callback(dev) : 0;
 	if (ret)
 		goto err;
 
+	dev_pm_enable_wake_irq_complete(dev);
+
 	/*
 	 * If the device can stay in suspend after the system-wide transition
 	 * to the working state that will follow, drop the children counter of
-	 * its parent, but set its status to RPM_SUSPENDED anyway in case this
-	 * function will be called again for it in the meantime.
+	 * its parent and the usage counters of its suppliers.  Otherwise, set
+	 * power.needs_force_resume to let pm_runtime_force_resume() know that
+	 * the device needs to be taken care of and to prevent this function
+	 * from handling the device again in case the device is passed to it
+	 * once more subsequently.
 	 */
-	if (pm_runtime_need_not_resume(dev)) {
+	if (pm_runtime_need_not_resume(dev))
 		pm_runtime_set_suspended(dev);
-	} else {
-		__update_runtime_status(dev, RPM_SUSPENDED);
-		dev->power.needs_force_resume = 1;
-	}
+	else
+		dev->power.needs_force_resume = true;
 
 	return 0;
 
 err:
+	dev_pm_disable_wake_irq_check(dev, true);
 	pm_runtime_enable(dev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pm_runtime_force_suspend);
 
+#ifdef CONFIG_PM_SLEEP
+
 /**
  * pm_runtime_force_resume - Force a device into resume state if needed.
  * @dev: Device to resume.
  *
- * Prior invoking this function we expect the user to have brought the device
- * into low power state by a call to pm_runtime_force_suspend(). Here we reverse
- * those actions and bring the device into full power, if it is expected to be
- * used on system resume.  In the other case, we defer the resume to be managed
- * via runtime PM.
+ * This function expects that either pm_runtime_force_suspend() has put the
+ * device into a low-power state prior to calling it, or the device had been
+ * runtime-suspended before the preceding system-wide suspend transition and it
+ * was left in suspend during that transition.
  *
- * Typically this function may be invoked from a system resume callback.
+ * The actions carried out by pm_runtime_force_suspend(), or by a runtime
+ * suspend in general, are reversed and the device is brought back into full
+ * power if it is expected to be used on system resume, which is the case when
+ * its needs_force_resume flag is set or when its smart_suspend flag is set and
+ * its runtime PM status is "active".
+ *
+ * In other cases, the resume is deferred to be managed via runtime PM.
+ *
+ * Typically, this function may be invoked from a system resume callback.
  */
 int pm_runtime_force_resume(struct device *dev)
 {
 	int (*callback)(struct device *);
 	int ret = 0;
 
-	if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
+	if (!dev->power.needs_force_resume && (!dev_pm_smart_suspend(dev) ||
+	    pm_runtime_status_suspended(dev)))
 		goto out;
 
-	/*
-	 * The value of the parent's children counter is correct already, so
-	 * just update the status of the device.
-	 */
-	__update_runtime_status(dev, RPM_ACTIVE);
-
-	callback = RPM_GET_CALLBACK(dev, runtime_resume);
+	callback = GET_CALLBACK(dev, runtime_resume);
 
+	dev_pm_disable_wake_irq_check(dev, false);
 	ret = callback ? callback(dev) : 0;
 	if (ret) {
 		pm_runtime_set_suspended(dev);
+		dev_pm_enable_wake_irq_check(dev, false);
 		goto out;
 	}
 
 	pm_runtime_mark_last_busy(dev);
+
 out:
-	dev->power.needs_force_resume = 0;
+	/*
+	 * The smart_suspend flag can be cleared here because it is not going
+	 * to be necessary until the next system-wide suspend transition that
+	 * will update it again.
+	 */
+	dev->power.smart_suspend = false;
+	/*
+	 * Also clear needs_force_resume to make this function skip devices that
+	 * have been seen by it once.
+	 */
+	dev->power.needs_force_resume = false;
+
 	pm_runtime_enable(dev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pm_runtime_force_resume);
+
+bool pm_runtime_need_not_resume(struct device *dev)
+{
+	return atomic_read(&dev->power.usage_count) <= 1 &&
+		(atomic_read(&dev->power.child_count) == 0 ||
+		 dev->power.ignore_children);
+}
+
+#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index a1474fb67db9..13b31a3adc77 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -6,7 +6,6 @@
 #include <linux/export.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
-#include <linux/pm_wakeup.h>
 #include <linux/atomic.h>
 #include <linux/jiffies.h>
 #include "power.h"
@@ -509,14 +508,6 @@ static ssize_t wakeup_last_time_ms_show(struct device *dev,
 	return sysfs_emit(buf, "%lld\n", msec);
 }
 
-static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid,
-						kgid_t kgid)
-{
-	if (dev->power.wakeup && dev->power.wakeup->dev)
-		return device_change_owner(dev->power.wakeup->dev, kuid, kgid);
-	return 0;
-}
-
 static DEVICE_ATTR_RO(wakeup_last_time_ms);
 
 #ifdef CONFIG_PM_AUTOSLEEP
@@ -541,6 +532,15 @@ static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev,
 
 static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms);
 #endif /* CONFIG_PM_AUTOSLEEP */
+
+static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid,
+						kgid_t kgid)
+{
+	if (dev->power.wakeup && dev->power.wakeup->dev)
+		return device_change_owner(dev->power.wakeup->dev, kuid, kgid);
+	return 0;
+}
+
 #else /* CONFIG_PM_SLEEP */
 static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid,
 						kgid_t kgid)
@@ -611,15 +611,9 @@ static DEVICE_ATTR_RW(async);
 #endif /* CONFIG_PM_ADVANCED_DEBUG */
 
 static struct attribute *power_attrs[] = {
-#ifdef CONFIG_PM_ADVANCED_DEBUG
-#ifdef CONFIG_PM_SLEEP
+#if defined(CONFIG_PM_ADVANCED_DEBUG) && defined(CONFIG_PM_SLEEP)
 	&dev_attr_async.attr,
 #endif
-	&dev_attr_runtime_status.attr,
-	&dev_attr_runtime_usage.attr,
-	&dev_attr_runtime_active_kids.attr,
-	&dev_attr_runtime_enabled.attr,
-#endif /* CONFIG_PM_ADVANCED_DEBUG */
 	NULL,
 };
 static const struct attribute_group pm_attr_group = {
@@ -650,13 +644,16 @@ static const struct attribute_group pm_wakeup_attr_group = {
 };
 
 static struct attribute *runtime_attrs[] = {
-#ifndef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_runtime_status.attr,
-#endif
 	&dev_attr_control.attr,
 	&dev_attr_runtime_suspended_time.attr,
 	&dev_attr_runtime_active_time.attr,
 	&dev_attr_autosuspend_delay_ms.attr,
+#ifdef CONFIG_PM_ADVANCED_DEBUG
+	&dev_attr_runtime_usage.attr,
+	&dev_attr_runtime_active_kids.attr,
+	&dev_attr_runtime_enabled.attr,
+#endif
 	NULL,
 };
 static const struct attribute_group pm_runtime_attr_group = {
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index a97f33d0c59f..d8da7195bb00 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/rtc.h>
 #include <linux/suspend.h>
+#include <linux/init.h>
 
 #include <linux/mc146818rtc.h>
 
@@ -119,7 +120,11 @@ static unsigned int read_magic_time(void)
 	struct rtc_time time;
 	unsigned int val;
 
-	mc146818_get_time(&time);
+	if (mc146818_get_time(&time, 1000) < 0) {
+		pr_err("Unable to read current time from RTC\n");
+		return 0;
+	}
+
 	pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
 	val = time.tm_year;				/* 100 years */
 	if (val > 100)
@@ -165,6 +170,9 @@ void generate_pm_trace(const void *tracedata, unsigned int user)
 	const char *file = *(const char **)(tracedata + 2);
 	unsigned int user_hash_value, file_hash_value;
 
+	if (!x86_platform.legacy.rtc)
+		return;
+
 	user_hash_value = user % USERHASH;
 	file_hash_value = hash_string(lineno, file, FILEHASH);
 	set_magic_time(user_hash_value, file_hash_value, dev_hash_value);
@@ -230,10 +238,8 @@ int show_trace_dev_match(char *buf, size_t size)
 		unsigned int hash = hash_string(DEVSEED, dev_name(dev),
 						DEVHASH);
 		if (hash == value) {
-			int len = snprintf(buf, size, "%s\n",
+			int len = scnprintf(buf, size, "%s\n",
 					    dev_driver_string(dev));
-			if (len > size)
-				len = size;
 			buf += len;
 			ret += len;
 			size -= len;
@@ -267,6 +273,9 @@ static struct notifier_block pm_trace_nb = {
 
 static int __init early_resume_init(void)
 {
+	if (!x86_platform.legacy.rtc)
+		return 0;
+
 	hash_value_early_read = read_magic_time();
 	register_pm_notifier(&pm_trace_nb);
 	return 0;
@@ -277,6 +286,9 @@ static int __init late_resume_init(void)
 	unsigned int val = hash_value_early_read;
 	unsigned int user, file, dev;
 
+	if (!x86_platform.legacy.rtc)
+		return 0;
+
 	user = val % USERHASH;
 	val = val / USERHASH;
 	file = val % FILEHASH;
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 3bad3266a2ad..8aa28c08b289 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -12,14 +12,11 @@
 /**
  * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ
  * @dev: Device entry
- * @irq: Device wake-up capable interrupt
  * @wirq: Wake irq specific data
  *
- * Internal function to attach either a device IO interrupt or a
- * dedicated wake-up interrupt as a wake IRQ.
+ * Internal function to attach a dedicated wake-up interrupt as a wake IRQ.
  */
-static int dev_pm_attach_wake_irq(struct device *dev, int irq,
-				  struct wake_irq *wirq)
+static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq)
 {
 	unsigned long flags;
 
@@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq)
 	wirq->dev = dev;
 	wirq->irq = irq;
 
-	err = dev_pm_attach_wake_irq(dev, irq, wirq);
+	err = dev_pm_attach_wake_irq(dev, wirq);
 	if (err)
 		kfree(wirq);
 
@@ -106,6 +103,32 @@ void dev_pm_clear_wake_irq(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq);
 
+static void devm_pm_clear_wake_irq(void *dev)
+{
+	dev_pm_clear_wake_irq(dev);
+}
+
+/**
+ * devm_pm_set_wake_irq - device-managed variant of dev_pm_set_wake_irq
+ * @dev: Device entry
+ * @irq: Device IO interrupt
+ *
+ *
+ * Attach a device IO interrupt as a wake IRQ, same with dev_pm_set_wake_irq,
+ * but the device will be auto clear wake capability on driver detach.
+ */
+int devm_pm_set_wake_irq(struct device *dev, int irq)
+{
+	int ret;
+
+	ret = dev_pm_set_wake_irq(dev, irq);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, devm_pm_clear_wake_irq, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_set_wake_irq);
+
 /**
  * handle_threaded_wake_irq - Handler for dedicated wake-up interrupts
  * @irq: Device specific dedicated wake-up interrupt
@@ -145,24 +168,7 @@ static irqreturn_t handle_threaded_wake_irq(int irq, void *_wirq)
 	return IRQ_HANDLED;
 }
 
-/**
- * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
- * @dev: Device entry
- * @irq: Device wake-up interrupt
- *
- * Unless your hardware has separate wake-up interrupts in addition
- * to the device IO interrupts, you don't need this.
- *
- * Sets up a threaded interrupt handler for a device that has
- * a dedicated wake-up interrupt in addition to the device IO
- * interrupt.
- *
- * The interrupt starts disabled, and needs to be managed for
- * the device by the bus code or the device driver using
- * dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq()
- * functions.
- */
-int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
+static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag)
 {
 	struct wake_irq *wirq;
 	int err;
@@ -196,11 +202,11 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 	if (err)
 		goto err_free_name;
 
-	err = dev_pm_attach_wake_irq(dev, irq, wirq);
+	err = dev_pm_attach_wake_irq(dev, wirq);
 	if (err)
 		goto err_free_irq;
 
-	wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED;
+	wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED | flag;
 
 	return err;
 
@@ -213,45 +219,45 @@ err_free:
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
 
 /**
- * dev_pm_enable_wake_irq - Enable device wake-up interrupt
- * @dev: Device
+ * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
+ * @dev: Device entry
+ * @irq: Device wake-up interrupt
  *
- * Optionally called from the bus code or the device driver for
- * runtime_resume() to override the PM runtime core managed wake-up
- * interrupt handling to enable the wake-up interrupt.
+ * Unless your hardware has separate wake-up interrupts in addition
+ * to the device IO interrupts, you don't need this.
  *
- * Note that for runtime_suspend()) the wake-up interrupts
- * should be unconditionally enabled unlike for suspend()
- * that is conditional.
+ * Sets up a threaded interrupt handler for a device that has
+ * a dedicated wake-up interrupt in addition to the device IO
+ * interrupt.
  */
-void dev_pm_enable_wake_irq(struct device *dev)
+int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 {
-	struct wake_irq *wirq = dev->power.wakeirq;
-
-	if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED))
-		enable_irq(wirq->irq);
+	return __dev_pm_set_dedicated_wake_irq(dev, irq, 0);
 }
-EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq);
+EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
 
 /**
- * dev_pm_disable_wake_irq - Disable device wake-up interrupt
- * @dev: Device
+ * dev_pm_set_dedicated_wake_irq_reverse - Request a dedicated wake-up interrupt
+ *                                         with reverse enable ordering
+ * @dev: Device entry
+ * @irq: Device wake-up interrupt
  *
- * Optionally called from the bus code or the device driver for
- * runtime_suspend() to override the PM runtime core managed wake-up
- * interrupt handling to disable the wake-up interrupt.
+ * Unless your hardware has separate wake-up interrupts in addition
+ * to the device IO interrupts, you don't need this.
+ *
+ * Sets up a threaded interrupt handler for a device that has a dedicated
+ * wake-up interrupt in addition to the device IO interrupt. It sets
+ * the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend()
+ * to enable dedicated wake-up interrupt after running the runtime suspend
+ * callback for @dev.
  */
-void dev_pm_disable_wake_irq(struct device *dev)
+int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
 {
-	struct wake_irq *wirq = dev->power.wakeirq;
-
-	if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED))
-		disable_irq_nosync(wirq->irq);
+	return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE);
 }
-EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
+EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
 
 /**
  * dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt
@@ -260,7 +266,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
  *
  * Enables wakeirq conditionally. We need to enable wake-up interrupt
  * lazily on the first rpm_suspend(). This is needed as the consumer device
- * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would
+ * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would
  * otherwise try to disable already disabled wakeirq. The wake-up interrupt
  * starts disabled with IRQ_NOAUTOEN set.
  *
@@ -285,25 +291,58 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
 	return;
 
 enable:
-	enable_irq(wirq->irq);
+	if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) {
+		enable_irq(wirq->irq);
+		wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
+	}
 }
 
 /**
  * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt
  * @dev: Device
+ * @cond_disable: if set, also check WAKE_IRQ_DEDICATED_REVERSE
  *
  * Disables wake-up interrupt conditionally based on status.
  * Should be only called from rpm_suspend() and rpm_resume() path.
  */
-void dev_pm_disable_wake_irq_check(struct device *dev)
+void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
 {
 	struct wake_irq *wirq = dev->power.wakeirq;
 
 	if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
 		return;
 
-	if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
+	if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
+		return;
+
+	if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) {
+		wirq->status &= ~WAKE_IRQ_DEDICATED_ENABLED;
 		disable_irq_nosync(wirq->irq);
+	}
+}
+
+/**
+ * dev_pm_enable_wake_irq_complete - enable wake IRQ not enabled before
+ * @dev: Device using the wake IRQ
+ *
+ * Enable wake IRQ conditionally based on status, mainly used if want to
+ * enable wake IRQ after running ->runtime_suspend() which depends on
+ * WAKE_IRQ_DEDICATED_REVERSE.
+ *
+ * Should be only called from rpm_suspend() path.
+ */
+void dev_pm_enable_wake_irq_complete(struct device *dev)
+{
+	struct wake_irq *wirq = dev->power.wakeirq;
+
+	if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
+		return;
+
+	if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED &&
+	    wirq->status & WAKE_IRQ_DEDICATED_REVERSE) {
+		enable_irq(wirq->irq);
+		wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
+	}
 }
 
 /**
@@ -320,7 +359,7 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
 
 	if (device_may_wakeup(wirq->dev)) {
 		if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
-		    !pm_runtime_status_suspended(wirq->dev))
+		    !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
 			enable_irq(wirq->irq);
 
 		enable_irq_wake(wirq->irq);
@@ -343,7 +382,7 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
 		disable_irq_wake(wirq->irq);
 
 		if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
-		    !pm_runtime_status_suspended(wirq->dev))
+		    !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
 			disable_irq_nosync(wirq->irq);
 	}
 }
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index f0b37c188514..1e1a0e7eeac5 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -19,11 +19,6 @@
 
 #include "power.h"
 
-#ifndef CONFIG_SUSPEND
-suspend_state_t pm_suspend_target_state;
-#define pm_suspend_target_state	(PM_SUSPEND_ON)
-#endif
-
 #define list_for_each_entry_rcu_locked(pos, head, member) \
 	list_for_each_entry_rcu(pos, head, member, \
 		srcu_read_lock_held(&wakeup_srcu))
@@ -34,7 +29,8 @@ suspend_state_t pm_suspend_target_state;
 bool events_check_enabled __read_mostly;
 
 /* First wakeup IRQ seen by the kernel in the last cycle. */
-unsigned int pm_wakeup_irq __read_mostly;
+static unsigned int wakeup_irq[2] __read_mostly;
+static DEFINE_RAW_SPINLOCK(wakeup_irq_lock);
 
 /* If greater than 0 and the system is suspending, terminate the suspend. */
 static atomic_t pm_abort_suspend __read_mostly;
@@ -81,7 +77,7 @@ static DEFINE_IDA(wakeup_ida);
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
  */
-struct wakeup_source *wakeup_source_create(const char *name)
+static struct wakeup_source *wakeup_source_create(const char *name)
 {
 	struct wakeup_source *ws;
 	const char *ws_name;
@@ -110,7 +106,6 @@ err_name:
 err_ws:
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(wakeup_source_create);
 
 /*
  * Record wakeup_source statistics being deleted into a dummy wakeup_source.
@@ -153,7 +148,7 @@ static void wakeup_source_free(struct wakeup_source *ws)
  *
  * Use only for wakeup source objects created with wakeup_source_create().
  */
-void wakeup_source_destroy(struct wakeup_source *ws)
+static void wakeup_source_destroy(struct wakeup_source *ws)
 {
 	if (!ws)
 		return;
@@ -162,13 +157,12 @@ void wakeup_source_destroy(struct wakeup_source *ws)
 	wakeup_source_record(ws);
 	wakeup_source_free(ws);
 }
-EXPORT_SYMBOL_GPL(wakeup_source_destroy);
 
 /**
  * wakeup_source_add - Add given object to the list of wakeup sources.
  * @ws: Wakeup source object to add to the list.
  */
-void wakeup_source_add(struct wakeup_source *ws)
+static void wakeup_source_add(struct wakeup_source *ws)
 {
 	unsigned long flags;
 
@@ -183,32 +177,29 @@ void wakeup_source_add(struct wakeup_source *ws)
 	list_add_rcu(&ws->entry, &wakeup_sources);
 	raw_spin_unlock_irqrestore(&events_lock, flags);
 }
-EXPORT_SYMBOL_GPL(wakeup_source_add);
 
 /**
  * wakeup_source_remove - Remove given object from the wakeup sources list.
  * @ws: Wakeup source object to remove from the list.
  */
-void wakeup_source_remove(struct wakeup_source *ws)
+static void wakeup_source_remove(struct wakeup_source *ws)
 {
 	unsigned long flags;
 
 	if (WARN_ON(!ws))
 		return;
 
+	/*
+	 * After shutting down the timer, wakeup_source_activate() will warn if
+	 * the given wakeup source is passed to it.
+	 */
+	timer_shutdown_sync(&ws->timer);
+
 	raw_spin_lock_irqsave(&events_lock, flags);
 	list_del_rcu(&ws->entry);
 	raw_spin_unlock_irqrestore(&events_lock, flags);
 	synchronize_srcu(&wakeup_srcu);
-
-	del_timer_sync(&ws->timer);
-	/*
-	 * Clear timer.function to make wakeup_source_not_registered() treat
-	 * this wakeup source as not registered.
-	 */
-	ws->timer.function = NULL;
 }
-EXPORT_SYMBOL_GPL(wakeup_source_remove);
 
 /**
  * wakeup_source_register - Create wakeup source and add it to the list.
@@ -341,7 +332,7 @@ int device_wakeup_enable(struct device *dev)
 	if (!dev || !dev->power.can_wakeup)
 		return -EINVAL;
 
-	if (pm_suspend_target_state != PM_SUSPEND_ON)
+	if (pm_sleep_transition_in_progress())
 		dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__);
 
 	ws = wakeup_source_register(dev, dev_name(dev));
@@ -455,16 +446,15 @@ static struct wakeup_source *device_wakeup_detach(struct device *dev)
  * Detach the @dev's wakeup source object from it, unregister this wakeup source
  * object and destroy it.
  */
-int device_wakeup_disable(struct device *dev)
+void device_wakeup_disable(struct device *dev)
 {
 	struct wakeup_source *ws;
 
 	if (!dev || !dev->power.can_wakeup)
-		return -EINVAL;
+		return;
 
 	ws = device_wakeup_detach(dev);
 	wakeup_source_unregister(ws);
-	return 0;
 }
 EXPORT_SYMBOL_GPL(device_wakeup_disable);
 
@@ -500,55 +490,29 @@ void device_set_wakeup_capable(struct device *dev, bool capable)
 EXPORT_SYMBOL_GPL(device_set_wakeup_capable);
 
 /**
- * device_init_wakeup - Device wakeup initialization.
- * @dev: Device to handle.
- * @enable: Whether or not to enable @dev as a wakeup device.
- *
- * By default, most devices should leave wakeup disabled.  The exceptions are
- * devices that everyone expects to be wakeup sources: keyboards, power buttons,
- * possibly network interfaces, etc.  Also, devices that don't generate their
- * own wakeup requests but merely forward requests from one bus to another
- * (like PCI bridges) should have wakeup enabled by default.
- */
-int device_init_wakeup(struct device *dev, bool enable)
-{
-	int ret = 0;
-
-	if (!dev)
-		return -EINVAL;
-
-	if (enable) {
-		device_set_wakeup_capable(dev, true);
-		ret = device_wakeup_enable(dev);
-	} else {
-		device_wakeup_disable(dev);
-		device_set_wakeup_capable(dev, false);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(device_init_wakeup);
-
-/**
  * device_set_wakeup_enable - Enable or disable a device to wake up the system.
  * @dev: Device to handle.
  * @enable: enable/disable flag
  */
 int device_set_wakeup_enable(struct device *dev, bool enable)
 {
-	return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev);
+	if (enable)
+		return device_wakeup_enable(dev);
+
+	device_wakeup_disable(dev);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
 
 /**
- * wakeup_source_not_registered - validate the given wakeup source.
+ * wakeup_source_not_usable - validate the given wakeup source.
  * @ws: Wakeup source to be validated.
  */
-static bool wakeup_source_not_registered(struct wakeup_source *ws)
+static bool wakeup_source_not_usable(struct wakeup_source *ws)
 {
 	/*
-	 * Use timer struct to check if the given source is initialized
-	 * by wakeup_source_add.
+	 * Use the timer struct to check if the given wakeup source has been
+	 * initialized by wakeup_source_add() and it is not going away.
 	 */
 	return ws->timer.function != pm_wakeup_timer_fn;
 }
@@ -586,15 +550,14 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
  * @ws: Wakeup source to handle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
- * core of the event by incrementing the counter of of wakeup events being
+ * core of the event by incrementing the counter of the wakeup events being
  * processed.
  */
 static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	unsigned int cec;
 
-	if (WARN_ONCE(wakeup_source_not_registered(ws),
-			"unregistered wakeup source\n"))
+	if (WARN_ONCE(wakeup_source_not_usable(ws), "unusable wakeup source\n"))
 		return;
 
 	ws->active = true;
@@ -644,7 +607,7 @@ void __pm_stay_awake(struct wakeup_source *ws)
 	spin_lock_irqsave(&ws->lock, flags);
 
 	wakeup_source_report_event(ws, false);
-	del_timer(&ws->timer);
+	timer_delete(&ws->timer);
 	ws->timer_expires = 0;
 
 	spin_unlock_irqrestore(&ws->lock, flags);
@@ -724,7 +687,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 		ws->max_time = duration;
 
 	ws->last_time = now;
-	del_timer(&ws->timer);
+	timer_delete(&ws->timer);
 	ws->timer_expires = 0;
 
 	if (ws->autosleep_enabled)
@@ -732,7 +695,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 
 	/*
 	 * Increment the counter of registered wakeup events and decrement the
-	 * couter of wakeup events in progress simultaneously.
+	 * counter of wakeup events in progress simultaneously.
 	 */
 	cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
 	trace_wakeup_source_deactivate(ws->name, cec);
@@ -794,7 +757,7 @@ EXPORT_SYMBOL_GPL(pm_relax);
  */
 static void pm_wakeup_timer_fn(struct timer_list *t)
 {
-	struct wakeup_source *ws = from_timer(ws, t, timer);
+	struct wakeup_source *ws = timer_container_of(ws, t, timer);
 	unsigned long flags;
 
 	spin_lock_irqsave(&ws->lock, flags);
@@ -929,6 +892,7 @@ bool pm_wakeup_pending(void)
 
 	return ret || atomic_read(&pm_abort_suspend) > 0;
 }
+EXPORT_SYMBOL_GPL(pm_wakeup_pending);
 
 void pm_system_wakeup(void)
 {
@@ -942,19 +906,47 @@ void pm_system_cancel_wakeup(void)
 	atomic_dec_if_positive(&pm_abort_suspend);
 }
 
-void pm_wakeup_clear(bool reset)
+void pm_wakeup_clear(unsigned int irq_number)
 {
-	pm_wakeup_irq = 0;
-	if (reset)
+	raw_spin_lock_irq(&wakeup_irq_lock);
+
+	if (irq_number && wakeup_irq[0] == irq_number)
+		wakeup_irq[0] = wakeup_irq[1];
+	else
+		wakeup_irq[0] = 0;
+
+	wakeup_irq[1] = 0;
+
+	raw_spin_unlock_irq(&wakeup_irq_lock);
+
+	if (!irq_number)
 		atomic_set(&pm_abort_suspend, 0);
 }
 
 void pm_system_irq_wakeup(unsigned int irq_number)
 {
-	if (pm_wakeup_irq == 0) {
-		pm_wakeup_irq = irq_number;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&wakeup_irq_lock, flags);
+
+	if (wakeup_irq[0] == 0)
+		wakeup_irq[0] = irq_number;
+	else if (wakeup_irq[1] == 0)
+		wakeup_irq[1] = irq_number;
+	else
+		irq_number = 0;
+
+	pm_pr_dbg("Triggering wakeup from IRQ %d\n", irq_number);
+
+	raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags);
+
+	if (irq_number)
 		pm_system_wakeup();
-	}
+}
+
+unsigned int pm_wakeup_irq(void)
+{
+	return wakeup_irq[0];
 }
 
 /**
@@ -1180,7 +1172,7 @@ static const struct file_operations wakeup_sources_stats_fops = {
 
 static int __init wakeup_sources_debugfs_init(void)
 {
-	debugfs_create_file("wakeup_sources", S_IRUGO, NULL, NULL,
+	debugfs_create_file("wakeup_sources", 0444, NULL, NULL,
 			    &wakeup_sources_stats_fops);
 	return 0;
 }
diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c
index 924fac493c4f..3ffd427248e8 100644
--- a/drivers/base/power/wakeup_stats.c
+++ b/drivers/base/power/wakeup_stats.c
@@ -34,6 +34,7 @@ wakeup_attr(active_count);
 wakeup_attr(event_count);
 wakeup_attr(wakeup_count);
 wakeup_attr(expire_count);
+wakeup_attr(relax_count);
 
 static ssize_t active_time_ms_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
@@ -119,6 +120,7 @@ static struct attribute *wakeup_source_attrs[] = {
 	&dev_attr_event_count.attr,
 	&dev_attr_wakeup_count.attr,
 	&dev_attr_expire_count.attr,
+	&dev_attr_relax_count.attr,
 	&dev_attr_active_time_ms.attr,
 	&dev_attr_total_time_ms.attr,
 	&dev_attr_max_time_ms.attr,
@@ -210,7 +212,7 @@ void wakeup_source_sysfs_remove(struct wakeup_source *ws)
 
 static int __init wakeup_sources_sysfs_init(void)
 {
-	wakeup_class = class_create(THIS_MODULE, "wakeup");
+	wakeup_class = class_create("wakeup");
 
 	return PTR_ERR_OR_ZERO(wakeup_class);
 }
diff --git a/drivers/base/property.c b/drivers/base/property.c
index d0874f6c29bb..6a63860579dd 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -7,23 +7,30 @@
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
-#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/kconfig.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_graph.h>
-#include <linux/of_irq.h>
 #include <linux/property.h>
-#include <linux/etherdevice.h>
 #include <linux/phy.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
 
-struct fwnode_handle *dev_fwnode(struct device *dev)
+struct fwnode_handle *__dev_fwnode(struct device *dev)
 {
 	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
 		of_fwnode_handle(dev->of_node) : dev->fwnode;
 }
-EXPORT_SYMBOL_GPL(dev_fwnode);
+EXPORT_SYMBOL_GPL(__dev_fwnode);
+
+const struct fwnode_handle *__dev_fwnode_const(const struct device *dev)
+{
+	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
+		of_fwnode_handle(dev->of_node) : dev->fwnode;
+}
+EXPORT_SYMBOL_GPL(__dev_fwnode_const);
 
 /**
  * device_property_present - check if a property of a device is present
@@ -31,8 +38,10 @@ EXPORT_SYMBOL_GPL(dev_fwnode);
  * @propname: Name of the property
  *
  * Check if property @propname is present in the device firmware description.
+ *
+ * Return: true if property @propname is present. Otherwise, returns false.
  */
-bool device_property_present(struct device *dev, const char *propname)
+bool device_property_present(const struct device *dev, const char *propname)
 {
 	return fwnode_property_present(dev_fwnode(dev), propname);
 }
@@ -42,22 +51,64 @@ EXPORT_SYMBOL_GPL(device_property_present);
  * fwnode_property_present - check if a property of a firmware node is present
  * @fwnode: Firmware node whose property to check
  * @propname: Name of the property
+ *
+ * Return: true if property @propname is present. Otherwise, returns false.
  */
 bool fwnode_property_present(const struct fwnode_handle *fwnode,
 			     const char *propname)
 {
 	bool ret;
 
+	if (IS_ERR_OR_NULL(fwnode))
+		return false;
+
 	ret = fwnode_call_bool_op(fwnode, property_present, propname);
-	if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
-	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = fwnode_call_bool_op(fwnode->secondary, property_present,
-					 propname);
-	return ret;
+	if (ret)
+		return ret;
+
+	return fwnode_call_bool_op(fwnode->secondary, property_present, propname);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_present);
 
 /**
+ * device_property_read_bool - Return the value for a boolean property of a device
+ * @dev: Device whose property is being checked
+ * @propname: Name of the property
+ *
+ * Return if property @propname is true or false in the device firmware description.
+ *
+ * Return: true if property @propname is present. Otherwise, returns false.
+ */
+bool device_property_read_bool(const struct device *dev, const char *propname)
+{
+	return fwnode_property_read_bool(dev_fwnode(dev), propname);
+}
+EXPORT_SYMBOL_GPL(device_property_read_bool);
+
+/**
+ * fwnode_property_read_bool - Return the value for a boolean property of a firmware node
+ * @fwnode: Firmware node whose property to check
+ * @propname: Name of the property
+ *
+ * Return if property @propname is true or false in the firmware description.
+ */
+bool fwnode_property_read_bool(const struct fwnode_handle *fwnode,
+			     const char *propname)
+{
+	bool ret;
+
+	if (IS_ERR_OR_NULL(fwnode))
+		return false;
+
+	ret = fwnode_call_bool_op(fwnode, property_read_bool, propname);
+	if (ret)
+		return ret;
+
+	return fwnode_call_bool_op(fwnode->secondary, property_read_bool, propname);
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_bool);
+
+/**
  * device_property_read_u8_array - return a u8 array property of a device
  * @dev: Device to get the property of
  * @propname: Name of the property
@@ -67,6 +118,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_present);
  * Function reads an array of u8 properties with @propname from the device
  * firmware description and stores them to @val if found.
  *
+ * It's recommended to call device_property_count_u8() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -75,7 +129,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_present);
  *	   %-EOVERFLOW if the size of the property is not as expected.
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_read_u8_array(struct device *dev, const char *propname,
+int device_property_read_u8_array(const struct device *dev, const char *propname,
 				  u8 *val, size_t nval)
 {
 	return fwnode_property_read_u8_array(dev_fwnode(dev), propname, val, nval);
@@ -92,6 +146,9 @@ EXPORT_SYMBOL_GPL(device_property_read_u8_array);
  * Function reads an array of u16 properties with @propname from the device
  * firmware description and stores them to @val if found.
  *
+ * It's recommended to call device_property_count_u16() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -100,7 +157,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u8_array);
  *	   %-EOVERFLOW if the size of the property is not as expected.
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_read_u16_array(struct device *dev, const char *propname,
+int device_property_read_u16_array(const struct device *dev, const char *propname,
 				   u16 *val, size_t nval)
 {
 	return fwnode_property_read_u16_array(dev_fwnode(dev), propname, val, nval);
@@ -117,6 +174,9 @@ EXPORT_SYMBOL_GPL(device_property_read_u16_array);
  * Function reads an array of u32 properties with @propname from the device
  * firmware description and stores them to @val if found.
  *
+ * It's recommended to call device_property_count_u32() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -125,7 +185,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u16_array);
  *	   %-EOVERFLOW if the size of the property is not as expected.
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_read_u32_array(struct device *dev, const char *propname,
+int device_property_read_u32_array(const struct device *dev, const char *propname,
 				   u32 *val, size_t nval)
 {
 	return fwnode_property_read_u32_array(dev_fwnode(dev), propname, val, nval);
@@ -142,6 +202,9 @@ EXPORT_SYMBOL_GPL(device_property_read_u32_array);
  * Function reads an array of u64 properties with @propname from the device
  * firmware description and stores them to @val if found.
  *
+ * It's recommended to call device_property_count_u64() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -150,7 +213,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u32_array);
  *	   %-EOVERFLOW if the size of the property is not as expected.
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_read_u64_array(struct device *dev, const char *propname,
+int device_property_read_u64_array(const struct device *dev, const char *propname,
 				   u64 *val, size_t nval)
 {
 	return fwnode_property_read_u64_array(dev_fwnode(dev), propname, val, nval);
@@ -167,6 +230,9 @@ EXPORT_SYMBOL_GPL(device_property_read_u64_array);
  * Function reads an array of string properties with @propname from the device
  * firmware description and stores them to @val if found.
  *
+ * It's recommended to call device_property_string_array_count() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values read on success if @val is non-NULL,
  *	   number of values available on success if @val is NULL,
  *	   %-EINVAL if given arguments are not valid,
@@ -175,7 +241,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u64_array);
  *	   %-EOVERFLOW if the size of the property is not as expected.
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_read_string_array(struct device *dev, const char *propname,
+int device_property_read_string_array(const struct device *dev, const char *propname,
 				      const char **val, size_t nval)
 {
 	return fwnode_property_read_string_array(dev_fwnode(dev), propname, val, nval);
@@ -197,7 +263,7 @@ EXPORT_SYMBOL_GPL(device_property_read_string_array);
  *	   %-EPROTO or %-EILSEQ if the property type is not a string.
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_read_string(struct device *dev, const char *propname,
+int device_property_read_string(const struct device *dev, const char *propname,
 				const char **val)
 {
 	return fwnode_property_read_string(dev_fwnode(dev), propname, val);
@@ -213,13 +279,13 @@ EXPORT_SYMBOL_GPL(device_property_read_string);
  * Find a given string in a string array and if it is found return the
  * index back.
  *
- * Return: %0 if the property was found (success),
+ * Return: index, starting from %0, if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
  *	   %-ENODATA if the property does not have a value,
  *	   %-EPROTO if the property is not an array of strings,
  *	   %-ENXIO if no suitable firmware interface is present.
  */
-int device_property_match_string(struct device *dev, const char *propname,
+int device_property_match_string(const struct device *dev, const char *propname,
 				 const char *string)
 {
 	return fwnode_property_match_string(dev_fwnode(dev), propname, string);
@@ -233,15 +299,16 @@ static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode,
 {
 	int ret;
 
+	if (IS_ERR_OR_NULL(fwnode))
+		return -EINVAL;
+
 	ret = fwnode_call_int_op(fwnode, property_read_int_array, propname,
 				 elem_size, val, nval);
-	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
-	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = fwnode_call_int_op(
-			fwnode->secondary, property_read_int_array, propname,
-			elem_size, val, nval);
+	if (ret != -EINVAL)
+		return ret;
 
-	return ret;
+	return fwnode_call_int_op(fwnode->secondary, property_read_int_array, propname,
+				  elem_size, val, nval);
 }
 
 /**
@@ -254,6 +321,9 @@ static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode,
  * Read an array of u8 properties with @propname from @fwnode and stores them to
  * @val if found.
  *
+ * It's recommended to call fwnode_property_count_u8() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -280,6 +350,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
  * Read an array of u16 properties with @propname from @fwnode and store them to
  * @val if found.
  *
+ * It's recommended to call fwnode_property_count_u16() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -306,6 +379,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
  * Read an array of u32 properties with @propname from @fwnode store them to
  * @val if found.
  *
+ * It's recommended to call fwnode_property_count_u32() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -332,6 +408,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
  * Read an array of u64 properties with @propname from @fwnode and store them to
  * @val if found.
  *
+ * It's recommended to call fwnode_property_count_u64() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values if @val was %NULL,
  *         %0 if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
@@ -358,6 +437,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array);
  * Read an string list property @propname from the given firmware node and store
  * them to @val if found.
  *
+ * It's recommended to call fwnode_property_string_array_count() instead of calling
+ * this function with @val equals %NULL and @nval equals 0.
+ *
  * Return: number of values read on success if @val is non-NULL,
  *	   number of values available on success if @val is NULL,
  *	   %-EINVAL if given arguments are not valid,
@@ -372,14 +454,16 @@ int fwnode_property_read_string_array(const struct fwnode_handle *fwnode,
 {
 	int ret;
 
+	if (IS_ERR_OR_NULL(fwnode))
+		return -EINVAL;
+
 	ret = fwnode_call_int_op(fwnode, property_read_string_array, propname,
 				 val, nval);
-	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
-	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = fwnode_call_int_op(fwnode->secondary,
-					 property_read_string_array, propname,
-					 val, nval);
-	return ret;
+	if (ret != -EINVAL)
+		return ret;
+
+	return fwnode_call_int_op(fwnode->secondary, property_read_string_array, propname,
+				  val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);
 
@@ -416,7 +500,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_string);
  * Find a given string in a string array and if it is found return the
  * index back.
  *
- * Return: %0 if the property was found (success),
+ * Return: index, starting from %0, if the property was found (success),
  *	   %-EINVAL if given arguments are not valid,
  *	   %-ENODATA if the property does not have a value,
  *	   %-EPROTO if the property is not an array of strings,
@@ -428,7 +512,7 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode,
 	const char **values;
 	int nval, ret;
 
-	nval = fwnode_property_read_string_array(fwnode, propname, NULL, 0);
+	nval = fwnode_property_string_array_count(fwnode, propname);
 	if (nval < 0)
 		return nval;
 
@@ -441,35 +525,72 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode,
 
 	ret = fwnode_property_read_string_array(fwnode, propname, values, nval);
 	if (ret < 0)
-		goto out;
+		goto out_free;
 
 	ret = match_string(values, nval, string);
 	if (ret < 0)
 		ret = -ENODATA;
-out:
+
+out_free:
 	kfree(values);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_match_string);
 
 /**
+ * fwnode_property_match_property_string - find a property string value in an array and return index
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property holding the string value
+ * @array: String array to search in
+ * @n: Size of the @array
+ *
+ * Find a property string value in a given @array and if it is found return
+ * the index back.
+ *
+ * Return: index, starting from %0, if the string value was found in the @array (success),
+ *	   %-ENOENT when the string value was not found in the @array,
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO or %-EILSEQ if the property is not a string,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_match_property_string(const struct fwnode_handle *fwnode,
+	const char *propname, const char * const *array, size_t n)
+{
+	const char *string;
+	int ret;
+
+	ret = fwnode_property_read_string(fwnode, propname, &string);
+	if (ret)
+		return ret;
+
+	ret = match_string(array, n, string);
+	if (ret < 0)
+		ret = -ENOENT;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(fwnode_property_match_property_string);
+
+/**
  * fwnode_property_get_reference_args() - Find a reference with arguments
  * @fwnode:	Firmware node where to look for the reference
  * @prop:	The name of the property
  * @nargs_prop:	The name of the property telling the number of
  *		arguments in the referred node. NULL if @nargs is known,
- *		otherwise @nargs is ignored. Only relevant on OF.
+ *		otherwise @nargs is ignored.
  * @nargs:	Number of arguments. Ignored if @nargs_prop is non-NULL.
  * @index:	Index of the reference, from zero onwards.
  * @args:	Result structure with reference and integer arguments.
+ *		May be NULL.
  *
  * Obtain a reference based on a named property in an fwnode, with
  * integer arguments.
  *
- * Caller is responsible to call fwnode_handle_put() on the returned
- * args->fwnode pointer.
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * @args->fwnode pointer.
  *
- * Returns: %0 on success
+ * Return: %0 on success
  *	    %-ENOENT when the index is out of bounds, the index has an empty
  *		     reference or the property was not found
  *	    %-EINVAL on parse error
@@ -479,7 +600,20 @@ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode,
 				       unsigned int nargs, unsigned int index,
 				       struct fwnode_reference_args *args)
 {
-	return fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop,
+	int ret;
+
+	if (IS_ERR_OR_NULL(fwnode))
+		return -ENOENT;
+
+	ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop,
+				 nargs, index, args);
+	if (ret == 0)
+		return ret;
+
+	if (IS_ERR_OR_NULL(fwnode->secondary))
+		return ret;
+
+	return fwnode_call_int_op(fwnode->secondary, get_reference_args, prop, nargs_prop,
 				  nargs, index, args);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args);
@@ -492,8 +626,11 @@ EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args);
  *
  * @index can be used when the named reference holds a table of references.
  *
- * Returns pointer to the reference fwnode, or ERR_PTR. Caller is responsible to
- * call fwnode_handle_put() on the returned fwnode pointer.
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
+ *
+ * Return: a pointer to the reference fwnode, when found. Otherwise,
+ * returns an error pointer.
  */
 struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode,
 					    const char *name,
@@ -509,58 +646,10 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode,
 EXPORT_SYMBOL_GPL(fwnode_find_reference);
 
 /**
- * device_remove_properties - Remove properties from a device object.
- * @dev: Device whose properties to remove.
- *
- * The function removes properties previously associated to the device
- * firmware node with device_add_properties(). Memory allocated to the
- * properties will also be released.
- */
-void device_remove_properties(struct device *dev)
-{
-	struct fwnode_handle *fwnode = dev_fwnode(dev);
-
-	if (!fwnode)
-		return;
-
-	if (is_software_node(fwnode->secondary)) {
-		fwnode_remove_software_node(fwnode->secondary);
-		set_secondary_fwnode(dev, NULL);
-	}
-}
-EXPORT_SYMBOL_GPL(device_remove_properties);
-
-/**
- * device_add_properties - Add a collection of properties to a device object.
- * @dev: Device to add properties to.
- * @properties: Collection of properties to add.
- *
- * Associate a collection of device properties represented by @properties with
- * @dev. The function takes a copy of @properties.
- *
- * WARNING: The callers should not use this function if it is known that there
- * is no real firmware node associated with @dev! In that case the callers
- * should create a software node and assign it to @dev directly.
- */
-int device_add_properties(struct device *dev,
-			  const struct property_entry *properties)
-{
-	struct fwnode_handle *fwnode;
-
-	fwnode = fwnode_create_software_node(properties, NULL);
-	if (IS_ERR(fwnode))
-		return PTR_ERR(fwnode);
-
-	set_secondary_fwnode(dev, fwnode);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(device_add_properties);
-
-/**
  * fwnode_get_name - Return the name of a node
  * @fwnode: The firmware node
  *
- * Returns a pointer to the node name.
+ * Return: a pointer to the node name, or %NULL.
  */
 const char *fwnode_get_name(const struct fwnode_handle *fwnode)
 {
@@ -572,7 +661,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_name);
  * fwnode_get_name_prefix - Return the prefix of node for printing purposes
  * @fwnode: The firmware node
  *
- * Returns the prefix of a node, intended to be printed right before the node.
+ * Return: the prefix of a node, intended to be printed right before the node.
  * The prefix works also as a separator between the nodes.
  */
 const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode)
@@ -581,10 +670,41 @@ const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode)
 }
 
 /**
+ * fwnode_name_eq - Return true if node name is equal
+ * @fwnode: The firmware node
+ * @name: The name to which to compare the node name
+ *
+ * Compare the name provided as an argument to the name of the node, stopping
+ * the comparison at either NUL or '@' character, whichever comes first. This
+ * function is generally used for comparing node names while ignoring the
+ * possible unit address of the node.
+ *
+ * Return: true if the node name matches with the name provided in the @name
+ * argument, false otherwise.
+ */
+bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name)
+{
+	const char *node_name;
+	ptrdiff_t len;
+
+	node_name = fwnode_get_name(fwnode);
+	if (!node_name)
+		return false;
+
+	len = strchrnul(node_name, '@') - node_name;
+
+	return str_has_prefix(node_name, name) == len;
+}
+EXPORT_SYMBOL_GPL(fwnode_name_eq);
+
+/**
  * fwnode_get_parent - Return parent firwmare node
  * @fwnode: Firmware whose parent is retrieved
  *
- * Return parent firmware node of the given node if possible or %NULL if no
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
+ *
+ * Return: parent firmware node of the given node if possible or %NULL if no
  * parent was available.
  */
 struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode)
@@ -601,8 +721,12 @@ EXPORT_SYMBOL_GPL(fwnode_get_parent);
  * on the passed node, making it suitable for iterating through a
  * node's parents.
  *
- * Returns a node pointer with refcount incremented, use
- * fwnode_handle_node() on it when done.
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer. Note that this function also puts a reference to @fwnode
+ * unconditionally.
+ *
+ * Return: parent firmware node of the given node if possible or %NULL if no
+ * parent was available.
  */
 struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode)
 {
@@ -615,46 +739,18 @@ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode)
 EXPORT_SYMBOL_GPL(fwnode_get_next_parent);
 
 /**
- * fwnode_get_next_parent_dev - Find device of closest ancestor fwnode
- * @fwnode: firmware node
- *
- * Given a firmware node (@fwnode), this function finds its closest ancestor
- * firmware node that has a corresponding struct device and returns that struct
- * device.
- *
- * The caller of this function is expected to call put_device() on the returned
- * device when they are done.
- */
-struct device *fwnode_get_next_parent_dev(struct fwnode_handle *fwnode)
-{
-	struct device *dev;
-
-	fwnode_handle_get(fwnode);
-	do {
-		fwnode = fwnode_get_next_parent(fwnode);
-		if (!fwnode)
-			return NULL;
-		dev = get_dev_from_fwnode(fwnode);
-	} while (!dev);
-	fwnode_handle_put(fwnode);
-	return dev;
-}
-
-/**
  * fwnode_count_parents - Return the number of parents a node has
  * @fwnode: The node the parents of which are to be counted
  *
- * Returns the number of parents a node has.
+ * Return: the number of parents a node has.
  */
 unsigned int fwnode_count_parents(const struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *__fwnode;
-	unsigned int count;
-
-	__fwnode = fwnode_get_parent(fwnode);
+	struct fwnode_handle *parent;
+	unsigned int count = 0;
 
-	for (count = 0; __fwnode; count++)
-		__fwnode = fwnode_get_next_parent(__fwnode);
+	fwnode_for_each_parent_node(fwnode, parent)
+		count++;
 
 	return count;
 }
@@ -665,58 +761,37 @@ EXPORT_SYMBOL_GPL(fwnode_count_parents);
  * @fwnode: The node the parent of which is requested
  * @depth: Distance of the parent from the node
  *
- * Returns the nth parent of a node. If there is no parent at the requested
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
+ *
+ * Return: the nth parent of a node. If there is no parent at the requested
  * @depth, %NULL is returned. If @depth is 0, the functionality is equivalent to
  * fwnode_handle_get(). For @depth == 1, it is fwnode_get_parent() and so on.
- *
- * The caller is responsible for calling fwnode_handle_put() for the returned
- * node.
  */
 struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode,
 					    unsigned int depth)
 {
-	unsigned int i;
-
-	fwnode_handle_get(fwnode);
-
-	for (i = 0; i < depth && fwnode; i++)
-		fwnode = fwnode_get_next_parent(fwnode);
-
-	return fwnode;
-}
-EXPORT_SYMBOL_GPL(fwnode_get_nth_parent);
+	struct fwnode_handle *parent;
 
-/**
- * fwnode_is_ancestor_of - Test if @test_ancestor is ancestor of @test_child
- * @test_ancestor: Firmware which is tested for being an ancestor
- * @test_child: Firmware which is tested for being the child
- *
- * A node is considered an ancestor of itself too.
- *
- * Returns true if @test_ancestor is an ancestor of @test_child.
- * Otherwise, returns false.
- */
-bool fwnode_is_ancestor_of(struct fwnode_handle *test_ancestor,
-				  struct fwnode_handle *test_child)
-{
-	if (!test_ancestor)
-		return false;
+	if (depth == 0)
+		return fwnode_handle_get(fwnode);
 
-	fwnode_handle_get(test_child);
-	while (test_child) {
-		if (test_child == test_ancestor) {
-			fwnode_handle_put(test_child);
-			return true;
-		}
-		test_child = fwnode_get_next_parent(test_child);
+	fwnode_for_each_parent_node(fwnode, parent) {
+		if (--depth == 0)
+			return parent;
 	}
-	return false;
+	return NULL;
 }
+EXPORT_SYMBOL_GPL(fwnode_get_nth_parent);
 
 /**
  * fwnode_get_next_child_node - Return the next child node handle for a node
  * @fwnode: Firmware node to find the next child node for.
  * @child: Handle to one of the node's child nodes or a %NULL handle.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer. Note that this function also puts a reference to @child
+ * unconditionally.
  */
 struct fwnode_handle *
 fwnode_get_next_child_node(const struct fwnode_handle *fwnode,
@@ -727,10 +802,13 @@ fwnode_get_next_child_node(const struct fwnode_handle *fwnode,
 EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);
 
 /**
- * fwnode_get_next_available_child_node - Return the next
- * available child node handle for a node
+ * fwnode_get_next_available_child_node - Return the next available child node handle for a node
  * @fwnode: Firmware node to find the next child node for.
  * @child: Handle to one of the node's child nodes or a %NULL handle.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer. Note that this function also puts a reference to @child
+ * unconditionally.
  */
 struct fwnode_handle *
 fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode,
@@ -738,7 +816,7 @@ fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode,
 {
 	struct fwnode_handle *next_child = child;
 
-	if (!fwnode)
+	if (IS_ERR_OR_NULL(fwnode))
 		return NULL;
 
 	do {
@@ -754,24 +832,28 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node);
 /**
  * device_get_next_child_node - Return the next child node handle for a device
  * @dev: Device to find the next child node for.
- * @child: Handle to one of the device's child nodes or a null handle.
+ * @child: Handle to one of the device's child nodes or a %NULL handle.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer. Note that this function also puts a reference to @child
+ * unconditionally.
  */
-struct fwnode_handle *device_get_next_child_node(struct device *dev,
+struct fwnode_handle *device_get_next_child_node(const struct device *dev,
 						 struct fwnode_handle *child)
 {
 	const struct fwnode_handle *fwnode = dev_fwnode(dev);
 	struct fwnode_handle *next;
 
+	if (IS_ERR_OR_NULL(fwnode))
+		return NULL;
+
 	/* Try to find a child in primary fwnode */
 	next = fwnode_get_next_child_node(fwnode, child);
 	if (next)
 		return next;
 
 	/* When no more children in primary, continue with secondary */
-	if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
-		next = fwnode_get_next_child_node(fwnode->secondary, child);
-
-	return next;
+	return fwnode_get_next_child_node(fwnode->secondary, child);
 }
 EXPORT_SYMBOL_GPL(device_get_next_child_node);
 
@@ -779,6 +861,9 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node);
  * fwnode_get_named_child_node - Return first matching named child node handle
  * @fwnode: Firmware node to find the named child node for.
  * @childname: String to match child node name against.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
  */
 struct fwnode_handle *
 fwnode_get_named_child_node(const struct fwnode_handle *fwnode,
@@ -792,8 +877,11 @@ EXPORT_SYMBOL_GPL(fwnode_get_named_child_node);
  * device_get_named_child_node - Return first matching named child node handle
  * @dev: Device to find the named child node for.
  * @childname: String to match child node name against.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
  */
-struct fwnode_handle *device_get_named_child_node(struct device *dev,
+struct fwnode_handle *device_get_named_child_node(const struct device *dev,
 						  const char *childname)
 {
 	return fwnode_get_named_child_node(dev_fwnode(dev), childname);
@@ -804,7 +892,10 @@ EXPORT_SYMBOL_GPL(device_get_named_child_node);
  * fwnode_handle_get - Obtain a reference to a device node
  * @fwnode: Pointer to the device node to obtain the reference to.
  *
- * Returns the fwnode handle.
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
+ *
+ * Return: the fwnode handle.
  */
 struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode)
 {
@@ -816,28 +907,19 @@ struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode)
 EXPORT_SYMBOL_GPL(fwnode_handle_get);
 
 /**
- * fwnode_handle_put - Drop reference to a device node
- * @fwnode: Pointer to the device node to drop the reference to.
- *
- * This has to be used when terminating device_for_each_child_node() iteration
- * with break or return to prevent stale device node references from being left
- * behind.
- */
-void fwnode_handle_put(struct fwnode_handle *fwnode)
-{
-	fwnode_call_void_op(fwnode, put);
-}
-EXPORT_SYMBOL_GPL(fwnode_handle_put);
-
-/**
  * fwnode_device_is_available - check if a device is available for use
  * @fwnode: Pointer to the fwnode of the device.
  *
+ * Return: true if device is available for use. Otherwise, returns false.
+ *
  * For fwnode node types that don't implement the .device_is_available()
  * operation, this function returns true.
  */
 bool fwnode_device_is_available(const struct fwnode_handle *fwnode)
 {
+	if (IS_ERR_OR_NULL(fwnode))
+		return false;
+
 	if (!fwnode_has_op(fwnode, device_is_available))
 		return true;
 
@@ -846,50 +928,62 @@ bool fwnode_device_is_available(const struct fwnode_handle *fwnode)
 EXPORT_SYMBOL_GPL(fwnode_device_is_available);
 
 /**
- * device_get_child_node_count - return the number of child nodes for device
- * @dev: Device to cound the child nodes for
+ * fwnode_get_child_node_count - return the number of child nodes for a given firmware node
+ * @fwnode: Pointer to the parent firmware node
+ *
+ * Return: the number of child nodes for a given firmware node.
  */
-unsigned int device_get_child_node_count(struct device *dev)
+unsigned int fwnode_get_child_node_count(const struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *child;
 	unsigned int count = 0;
 
-	device_for_each_child_node(dev, child)
+	fwnode_for_each_child_node(fwnode, child)
 		count++;
 
 	return count;
 }
-EXPORT_SYMBOL_GPL(device_get_child_node_count);
+EXPORT_SYMBOL_GPL(fwnode_get_child_node_count);
 
-bool device_dma_supported(struct device *dev)
+/**
+ * fwnode_get_named_child_node_count - number of child nodes with given name
+ * @fwnode: Node which child nodes are counted.
+ * @name: String to match child node name against.
+ *
+ * Scan child nodes and count all the nodes with a specific name. Potential
+ * 'number' -ending after the 'at sign' for scanned names is ignored.
+ * E.g.::
+ *   fwnode_get_named_child_node_count(fwnode, "channel");
+ * would match all the nodes::
+ *   channel { }, channel@0 {}, channel@0xabba {}...
+ *
+ * Return: the number of child nodes with a matching name for a given device.
+ */
+unsigned int fwnode_get_named_child_node_count(const struct fwnode_handle *fwnode,
+					       const char *name)
 {
-	const struct fwnode_handle *fwnode = dev_fwnode(dev);
+	struct fwnode_handle *child;
+	unsigned int count = 0;
 
-	/* For DT, this is always supported.
-	 * For ACPI, this depends on CCA, which
-	 * is determined by the acpi_dma_supported().
-	 */
-	if (is_of_node(fwnode))
-		return true;
+	fwnode_for_each_named_child_node(fwnode, child, name)
+		count++;
 
-	return acpi_dma_supported(to_acpi_device_node(fwnode));
+	return count;
 }
-EXPORT_SYMBOL_GPL(device_dma_supported);
+EXPORT_SYMBOL_GPL(fwnode_get_named_child_node_count);
 
-enum dev_dma_attr device_get_dma_attr(struct device *dev)
+bool device_dma_supported(const struct device *dev)
 {
-	const struct fwnode_handle *fwnode = dev_fwnode(dev);
-	enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED;
+	return fwnode_call_bool_op(dev_fwnode(dev), device_dma_supported);
+}
+EXPORT_SYMBOL_GPL(device_dma_supported);
 
-	if (is_of_node(fwnode)) {
-		if (of_dma_is_coherent(to_of_node(fwnode)))
-			attr = DEV_DMA_COHERENT;
-		else
-			attr = DEV_DMA_NON_COHERENT;
-	} else
-		attr = acpi_get_dma_attr(to_acpi_device_node(fwnode));
+enum dev_dma_attr device_get_dma_attr(const struct device *dev)
+{
+	if (!fwnode_has_op(dev_fwnode(dev), device_get_dma_attr))
+		return DEV_DMA_NOT_SUPPORTED;
 
-	return attr;
+	return fwnode_call_int_op(dev_fwnode(dev), device_get_dma_attr);
 }
 EXPORT_SYMBOL_GPL(device_get_dma_attr);
 
@@ -901,7 +995,7 @@ EXPORT_SYMBOL_GPL(device_get_dma_attr);
  * 'phy-connection-type', and return its index in phy_modes table, or errno in
  * error case.
  */
-int fwnode_get_phy_mode(struct fwnode_handle *fwnode)
+int fwnode_get_phy_mode(const struct fwnode_handle *fwnode)
 {
 	const char *pm;
 	int err, i;
@@ -935,105 +1029,109 @@ int device_get_phy_mode(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_get_phy_mode);
 
-static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
-				 const char *name, char *addr,
-				 int alen)
-{
-	int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
-
-	if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr))
-		return addr;
-	return NULL;
-}
-
 /**
- * fwnode_get_mac_address - Get the MAC from the firmware node
+ * fwnode_iomap - Maps the memory mapped IO for a given fwnode
  * @fwnode:	Pointer to the firmware node
- * @addr:	Address of buffer to store the MAC in
- * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
- *
- * Search the firmware node for the best MAC address to use.  'mac-address' is
- * checked first, because that is supposed to contain to "most recent" MAC
- * address. If that isn't set, then 'local-mac-address' is checked next,
- * because that is the default address.  If that isn't set, then the obsolete
- * 'address' is checked, just in case we're using an old device tree.
- *
- * Note that the 'address' property is supposed to contain a virtual address of
- * the register set, but some DTS files have redefined that property to be the
- * MAC address.
- *
- * All-zero MAC addresses are rejected, because those could be properties that
- * exist in the firmware tables, but were not updated by the firmware.  For
- * example, the DTS could define 'mac-address' and 'local-mac-address', with
- * zero MAC addresses.  Some older U-Boots only initialized 'local-mac-address'.
- * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
- * exists but is all zeros.
-*/
-void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
-{
-	char *res;
-
-	res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen);
-	if (res)
-		return res;
-
-	res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen);
-	if (res)
-		return res;
-
-	return fwnode_get_mac_addr(fwnode, "address", addr, alen);
-}
-EXPORT_SYMBOL(fwnode_get_mac_address);
-
-/**
- * device_get_mac_address - Get the MAC for a given device
- * @dev:	Pointer to the device
- * @addr:	Address of buffer to store the MAC in
- * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
+ * @index:	Index of the IO range
+ *
+ * Return: a pointer to the mapped memory.
  */
-void *device_get_mac_address(struct device *dev, char *addr, int alen)
+void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index)
 {
-	return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
+	return fwnode_call_ptr_op(fwnode, iomap, index);
 }
-EXPORT_SYMBOL(device_get_mac_address);
+EXPORT_SYMBOL(fwnode_iomap);
 
 /**
  * fwnode_irq_get - Get IRQ directly from a fwnode
  * @fwnode:	Pointer to the firmware node
  * @index:	Zero-based index of the IRQ
  *
- * Returns Linux IRQ number on success. Other values are determined
- * accordingly to acpi_/of_ irq_get() operation.
+ * Return: Linux IRQ number on success. Negative errno on failure.
  */
 int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index)
 {
-	struct resource res;
 	int ret;
 
-	if (is_of_node(fwnode))
-		return of_irq_get(to_of_node(fwnode), index);
+	ret = fwnode_call_int_op(fwnode, irq_get, index);
+	/* We treat mapping errors as invalid case */
+	if (ret == 0)
+		return -EINVAL;
 
-	ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), index, &res);
-	if (ret)
-		return ret;
-
-	return res.start;
+	return ret;
 }
 EXPORT_SYMBOL(fwnode_irq_get);
 
 /**
+ * fwnode_irq_get_byname - Get IRQ from a fwnode using its name
+ * @fwnode:	Pointer to the firmware node
+ * @name:	IRQ name
+ *
+ * Description:
+ * Find a match to the string @name in the 'interrupt-names' string array
+ * in _DSD for ACPI, or of_node for Device Tree. Then get the Linux IRQ
+ * number of the IRQ resource corresponding to the index of the matched
+ * string.
+ *
+ * Return: Linux IRQ number on success, or negative errno otherwise.
+ */
+int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name)
+{
+	int index;
+
+	if (!name)
+		return -EINVAL;
+
+	index = fwnode_property_match_string(fwnode, "interrupt-names",  name);
+	if (index < 0)
+		return index;
+
+	return fwnode_irq_get(fwnode, index);
+}
+EXPORT_SYMBOL(fwnode_irq_get_byname);
+
+/**
  * fwnode_graph_get_next_endpoint - Get next endpoint firmware node
  * @fwnode: Pointer to the parent firmware node
  * @prev: Previous endpoint node or %NULL to get the first
  *
- * Returns an endpoint firmware node pointer or %NULL if no more endpoints
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer. Note that this function also puts a reference to @prev
+ * unconditionally.
+ *
+ * Return: an endpoint firmware node pointer or %NULL if no more endpoints
  * are available.
  */
 struct fwnode_handle *
 fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
 			       struct fwnode_handle *prev)
 {
-	return fwnode_call_ptr_op(fwnode, graph_get_next_endpoint, prev);
+	struct fwnode_handle *ep, *port_parent = NULL;
+	const struct fwnode_handle *parent;
+
+	/*
+	 * If this function is in a loop and the previous iteration returned
+	 * an endpoint from fwnode->secondary, then we need to use the secondary
+	 * as parent rather than @fwnode.
+	 */
+	if (prev) {
+		port_parent = fwnode_graph_get_port_parent(prev);
+		parent = port_parent;
+	} else {
+		parent = fwnode;
+	}
+	if (IS_ERR_OR_NULL(parent))
+		return NULL;
+
+	ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev);
+	if (ep)
+		goto out_put_port_parent;
+
+	ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL);
+
+out_put_port_parent:
+	fwnode_handle_put(port_parent);
+	return ep;
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 
@@ -1041,6 +1139,9 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
  * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint
  * @endpoint: Endpoint firmware node of the port
  *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
+ *
  * Return: the firmware node of the device the @endpoint belongs to.
  */
 struct fwnode_handle *
@@ -1062,6 +1163,9 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent);
  * @fwnode: Endpoint firmware node pointing to the remote endpoint
  *
  * Extracts firmware node of a remote device the @fwnode points to.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
  */
 struct fwnode_handle *
 fwnode_graph_get_remote_port_parent(const struct fwnode_handle *fwnode)
@@ -1082,6 +1186,9 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent);
  * @fwnode: Endpoint firmware node pointing to the remote endpoint
  *
  * Extracts firmware node of a remote port the @fwnode points to.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
  */
 struct fwnode_handle *
 fwnode_graph_get_remote_port(const struct fwnode_handle *fwnode)
@@ -1095,6 +1202,9 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
  * @fwnode: Endpoint firmware node pointing to the remote endpoint
  *
  * Extracts firmware node of a remote endpoint the @fwnode points to.
+ *
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
  */
 struct fwnode_handle *
 fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
@@ -1103,43 +1213,17 @@ fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 
-/**
- * fwnode_graph_get_remote_node - get remote parent node for given port/endpoint
- * @fwnode: pointer to parent fwnode_handle containing graph port/endpoint
- * @port_id: identifier of the parent port node
- * @endpoint_id: identifier of the endpoint node
- *
- * Return: Remote fwnode handle associated with remote endpoint node linked
- *	   to @node. Use fwnode_node_put() on it when done.
- */
-struct fwnode_handle *
-fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port_id,
-			     u32 endpoint_id)
+static bool fwnode_graph_remote_available(struct fwnode_handle *ep)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	while ((endpoint = fwnode_graph_get_next_endpoint(fwnode, endpoint))) {
-		struct fwnode_endpoint fwnode_ep;
-		struct fwnode_handle *remote;
-		int ret;
+	struct fwnode_handle *dev_node;
+	bool available;
 
-		ret = fwnode_graph_parse_endpoint(endpoint, &fwnode_ep);
-		if (ret < 0)
-			continue;
-
-		if (fwnode_ep.port != port_id || fwnode_ep.id != endpoint_id)
-			continue;
+	dev_node = fwnode_graph_get_remote_port_parent(ep);
+	available = fwnode_device_is_available(dev_node);
+	fwnode_handle_put(dev_node);
 
-		remote = fwnode_graph_get_remote_port_parent(endpoint);
-		if (!remote)
-			return NULL;
-
-		return fwnode_device_is_available(remote) ? remote : NULL;
-	}
-
-	return NULL;
+	return available;
 }
-EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_node);
 
 /**
  * fwnode_graph_get_endpoint_by_id - get endpoint by port and endpoint numbers
@@ -1148,42 +1232,34 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_node);
  * @endpoint: identifier of the endpoint node under the port node
  * @flags: fwnode lookup flags
  *
- * Return the fwnode handle of the local endpoint corresponding the port and
- * endpoint IDs or NULL if not found.
+ * The caller is responsible for calling fwnode_handle_put() on the returned
+ * fwnode pointer.
+ *
+ * Return: the fwnode handle of the local endpoint corresponding the port and
+ * endpoint IDs or %NULL if not found.
  *
  * If FWNODE_GRAPH_ENDPOINT_NEXT is passed in @flags and the specified endpoint
  * has not been found, look for the closest endpoint ID greater than the
  * specified one and return the endpoint that corresponds to it, if present.
  *
- * Do not return endpoints that belong to disabled devices, unless
- * FWNODE_GRAPH_DEVICE_DISABLED is passed in @flags.
- *
- * The returned endpoint needs to be released by calling fwnode_handle_put() on
- * it when it is not needed any more.
+ * Does not return endpoints that belong to disabled devices or endpoints that
+ * are unconnected, unless FWNODE_GRAPH_DEVICE_DISABLED is passed in @flags.
  */
 struct fwnode_handle *
 fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
 				u32 port, u32 endpoint, unsigned long flags)
 {
-	struct fwnode_handle *ep = NULL, *best_ep = NULL;
+	struct fwnode_handle *ep, *best_ep = NULL;
 	unsigned int best_ep_id = 0;
 	bool endpoint_next = flags & FWNODE_GRAPH_ENDPOINT_NEXT;
 	bool enabled_only = !(flags & FWNODE_GRAPH_DEVICE_DISABLED);
 
-	while ((ep = fwnode_graph_get_next_endpoint(fwnode, ep))) {
+	fwnode_graph_for_each_endpoint(fwnode, ep) {
 		struct fwnode_endpoint fwnode_ep = { 0 };
 		int ret;
 
-		if (enabled_only) {
-			struct fwnode_handle *dev_node;
-			bool available;
-
-			dev_node = fwnode_graph_get_remote_port_parent(ep);
-			available = fwnode_device_is_available(dev_node);
-			fwnode_handle_put(dev_node);
-			if (!available)
-				continue;
-		}
+		if (enabled_only && !fwnode_graph_remote_available(ep))
+			continue;
 
 		ret = fwnode_graph_parse_endpoint(ep, &fwnode_ep);
 		if (ret < 0)
@@ -1212,16 +1288,34 @@ fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
 		best_ep_id = fwnode_ep.id;
 	}
 
-	if (best_ep)
-		return best_ep;
+	return best_ep;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id);
 
-	if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
-		return fwnode_graph_get_endpoint_by_id(fwnode->secondary, port,
-						       endpoint, flags);
+/**
+ * fwnode_graph_get_endpoint_count - Count endpoints on a device node
+ * @fwnode: The node related to a device
+ * @flags: fwnode lookup flags
+ * Count endpoints in a device node.
+ *
+ * If FWNODE_GRAPH_DEVICE_DISABLED flag is specified, also unconnected endpoints
+ * and endpoints connected to disabled devices are counted.
+ */
+unsigned int fwnode_graph_get_endpoint_count(const struct fwnode_handle *fwnode,
+					     unsigned long flags)
+{
+	struct fwnode_handle *ep;
+	unsigned int count = 0;
 
-	return NULL;
+	fwnode_graph_for_each_endpoint(fwnode, ep) {
+		if (flags & FWNODE_GRAPH_DEVICE_DISABLED ||
+		    fwnode_graph_remote_available(ep))
+			count++;
+	}
+
+	return count;
 }
-EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id);
+EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_count);
 
 /**
  * fwnode_graph_parse_endpoint - parse common endpoint node properties
@@ -1241,55 +1335,75 @@ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
 
-const void *device_get_match_data(struct device *dev)
+const void *device_get_match_data(const struct device *dev)
 {
 	return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev);
 }
 EXPORT_SYMBOL_GPL(device_get_match_data);
 
-static void *
-fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
-			  void *data, devcon_match_fn_t match)
+static unsigned int fwnode_graph_devcon_matches(const struct fwnode_handle *fwnode,
+						const char *con_id, void *data,
+						devcon_match_fn_t match,
+						void **matches,
+						unsigned int matches_len)
 {
 	struct fwnode_handle *node;
 	struct fwnode_handle *ep;
+	unsigned int count = 0;
 	void *ret;
 
 	fwnode_graph_for_each_endpoint(fwnode, ep) {
+		if (matches && count >= matches_len) {
+			fwnode_handle_put(ep);
+			break;
+		}
+
 		node = fwnode_graph_get_remote_port_parent(ep);
-		if (!fwnode_device_is_available(node))
+		if (!fwnode_device_is_available(node)) {
+			fwnode_handle_put(node);
 			continue;
+		}
 
 		ret = match(node, con_id, data);
 		fwnode_handle_put(node);
 		if (ret) {
-			fwnode_handle_put(ep);
-			return ret;
+			if (matches)
+				matches[count] = ret;
+			count++;
 		}
 	}
-	return NULL;
+	return count;
 }
 
-static void *
-fwnode_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
-		    void *data, devcon_match_fn_t match)
+static unsigned int fwnode_devcon_matches(const struct fwnode_handle *fwnode,
+					  const char *con_id, void *data,
+					  devcon_match_fn_t match,
+					  void **matches,
+					  unsigned int matches_len)
 {
 	struct fwnode_handle *node;
+	unsigned int count = 0;
+	unsigned int i;
 	void *ret;
-	int i;
 
 	for (i = 0; ; i++) {
+		if (matches && count >= matches_len)
+			break;
+
 		node = fwnode_find_reference(fwnode, con_id, i);
 		if (IS_ERR(node))
 			break;
 
 		ret = match(node, NULL, data);
 		fwnode_handle_put(node);
-		if (ret)
-			return ret;
+		if (ret) {
+			if (matches)
+				matches[count] = ret;
+			count++;
+		}
 	}
 
-	return NULL;
+	return count;
 }
 
 /**
@@ -1303,19 +1417,66 @@ fwnode_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
  * device node. @match will be used to convert the connection description to
  * data the caller is expecting to be returned.
  */
-void *fwnode_connection_find_match(struct fwnode_handle *fwnode,
+void *fwnode_connection_find_match(const struct fwnode_handle *fwnode,
 				   const char *con_id, void *data,
 				   devcon_match_fn_t match)
 {
+	unsigned int count;
 	void *ret;
 
 	if (!fwnode || !match)
 		return NULL;
 
-	ret = fwnode_graph_devcon_match(fwnode, con_id, data, match);
-	if (ret)
+	count = fwnode_graph_devcon_matches(fwnode, con_id, data, match, &ret, 1);
+	if (count)
 		return ret;
 
-	return fwnode_devcon_match(fwnode, con_id, data, match);
+	count = fwnode_devcon_matches(fwnode, con_id, data, match, &ret, 1);
+	return count ? ret : NULL;
 }
 EXPORT_SYMBOL_GPL(fwnode_connection_find_match);
+
+/**
+ * fwnode_connection_find_matches - Find connections from a device node
+ * @fwnode: Device node with the connection
+ * @con_id: Identifier for the connection
+ * @data: Data for the match function
+ * @match: Function to check and convert the connection description
+ * @matches: (Optional) array of pointers to fill with matches
+ * @matches_len: Length of @matches
+ *
+ * Find up to @matches_len connections with unique identifier @con_id between
+ * @fwnode and other device nodes. @match will be used to convert the
+ * connection description to data the caller is expecting to be returned
+ * through the @matches array.
+ *
+ * If @matches is %NULL @matches_len is ignored and the total number of resolved
+ * matches is returned.
+ *
+ * Return: Number of matches resolved, or negative errno.
+ */
+int fwnode_connection_find_matches(const struct fwnode_handle *fwnode,
+				   const char *con_id, void *data,
+				   devcon_match_fn_t match,
+				   void **matches, unsigned int matches_len)
+{
+	unsigned int count_graph;
+	unsigned int count_ref;
+
+	if (!fwnode || !match)
+		return -EINVAL;
+
+	count_graph = fwnode_graph_devcon_matches(fwnode, con_id, data, match,
+						  matches, matches_len);
+
+	if (matches) {
+		matches += count_graph;
+		matches_len -= count_graph;
+	}
+
+	count_ref = fwnode_devcon_matches(fwnode, con_id, data, match,
+					  matches, matches_len);
+
+	return count_graph + count_ref;
+}
+EXPORT_SYMBOL_GPL(fwnode_connection_find_matches);
diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 159bac6c5046..ffb2ef488298 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -4,15 +4,32 @@
 # subsystems should select the appropriate symbols.
 
 config REGMAP
-	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO)
-	select IRQ_DOMAIN if REGMAP_IRQ
-	select MDIO_BUS if REGMAP_MDIO
-	bool
-
-config REGCACHE_COMPRESSED
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
 	bool
+	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO || REGMAP_FSI)
+	help
+	  Enable support for the Register Map (regmap) access API.
+
+	  Usually, this option is automatically selected when needed.
+	  However, you may want to enable it manually for running the regmap
+	  KUnit tests.
+
+	  If unsure, say N.
+
+config REGMAP_KUNIT
+	tristate "KUnit tests for regmap"
+	depends on KUNIT && REGMAP
+	default KUNIT_ALL_TESTS
+	select REGMAP_RAM
+
+config REGMAP_BUILD
+	bool "Enable regmap build"
+	depends on KUNIT
+	select REGMAP
+	help
+	  This option exists purely to allow the regmap KUnit tests to
+	  be enabled without having to enable some driver that uses
+	  regmap due to unfortunate issues with how KUnit tests are
+	  normally enabled.
 
 config REGMAP_AC97
 	tristate
@@ -39,12 +56,17 @@ config REGMAP_W1
 
 config REGMAP_MDIO
 	tristate
+	select MDIO_BUS
 
 config REGMAP_MMIO
 	tristate
 
 config REGMAP_IRQ
 	bool
+	select IRQ_DOMAIN
+
+config REGMAP_RAM
+	tristate
 
 config REGMAP_SOUNDWIRE
 	tristate
@@ -65,3 +87,7 @@ config REGMAP_I3C
 config REGMAP_SPI_AVMM
 	tristate
 	depends on SPI
+
+config REGMAP_FSI
+	tristate
+	depends on FSI
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 11facb32a027..5fdd0845b45e 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -3,11 +3,12 @@
 CFLAGS_regmap.o := -I$(src)
 
 obj-$(CONFIG_REGMAP) += regmap.o regcache.o
-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o
-obj-$(CONFIG_REGCACHE_COMPRESSED) += regcache-lzo.o
+obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o regcache-maple.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
+obj-$(CONFIG_REGMAP_KUNIT) += regmap-kunit.o
 obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
+obj-$(CONFIG_REGMAP_RAM) += regmap-ram.o regmap-raw-ram.o
 obj-$(CONFIG_REGMAP_SLIMBUS) += regmap-slimbus.o
 obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
 obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o
@@ -20,3 +21,4 @@ obj-$(CONFIG_REGMAP_SCCB) += regmap-sccb.o
 obj-$(CONFIG_REGMAP_I3C) += regmap-i3c.o
 obj-$(CONFIG_REGMAP_SPI_AVMM) += regmap-spi-avmm.o
 obj-$(CONFIG_REGMAP_MDIO) += regmap-mdio.o
+obj-$(CONFIG_REGMAP_FSI) += regmap-fsi.o
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 0097696c31de..1477329410ec 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -32,6 +32,7 @@ struct regmap_format {
 	size_t reg_bytes;
 	size_t pad_bytes;
 	size_t val_bytes;
+	s8 reg_shift;
 	void (*format_write)(struct regmap *map,
 			     unsigned int reg, unsigned int val);
 	void (*format_reg)(void *buf, unsigned int reg, unsigned int shift);
@@ -53,11 +54,17 @@ struct regmap {
 			spinlock_t spinlock;
 			unsigned long spinlock_flags;
 		};
+		struct {
+			raw_spinlock_t raw_spinlock;
+			unsigned long raw_spinlock_flags;
+		};
 	};
+	struct lock_class_key *lock_key;
 	regmap_lock lock;
 	regmap_unlock unlock;
 	void *lock_arg; /* This is passed to lock/unlock functions */
 	gfp_t alloc_flags;
+	unsigned int reg_base;
 
 	struct device *dev; /* Device we do I/O on */
 	void *work_buf;     /* Scratch buffer used to format I/O */
@@ -66,12 +73,12 @@ struct regmap {
 	void *bus_context;
 	const char *name;
 
-	bool async;
 	spinlock_t async_lock;
 	wait_queue_head_t async_waitq;
 	struct list_head async_list;
 	struct list_head async_free;
 	int async_ret;
+	bool async;
 
 #ifdef CONFIG_DEBUG_FS
 	bool debugfs_disable;
@@ -87,6 +94,7 @@ struct regmap {
 #endif
 
 	unsigned int max_register;
+	bool max_register_is_set;
 	bool (*writeable_reg)(struct device *dev, unsigned int reg);
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
@@ -104,8 +112,10 @@ struct regmap {
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
 	int (*reg_update_bits)(void *context, unsigned int reg,
 			       unsigned int mask, unsigned int val);
-
-	bool defer_caching;
+	/* Bulk read/write */
+	int (*read)(void *context, const void *reg_buf, size_t reg_size,
+		    void *val_buf, size_t val_size);
+	int (*write)(void *context, const void *data, size_t count);
 
 	unsigned long read_flag_mask;
 	unsigned long write_flag_mask;
@@ -115,6 +125,11 @@ struct regmap {
 	int reg_stride;
 	int reg_stride_order;
 
+	bool defer_caching;
+
+	/* If set, will always write field to HW. */
+	bool force_write_field;
+
 	/* regcache specific members */
 	const struct regcache_ops *cache_ops;
 	enum regcache_type cache_type;
@@ -146,6 +161,9 @@ struct regmap {
 	struct reg_sequence *patch;
 	int patch_regs;
 
+	/* if set, the regmap core can sleep */
+	bool can_sleep;
+
 	/* if set, converts bulk read to single read */
 	bool use_single_read;
 	/* if set, converts bulk write to single write */
@@ -161,9 +179,6 @@ struct regmap {
 	void *selector_work_buf;	/* Scratch buffer used for selector */
 
 	struct hwspinlock *hwlock;
-
-	/* if set, the regmap core can sleep */
-	bool can_sleep;
 };
 
 struct regcache_ops {
@@ -171,6 +186,7 @@ struct regcache_ops {
 	enum regcache_type type;
 	int (*init)(struct regmap *map);
 	int (*exit)(struct regmap *map);
+	int (*populate)(struct regmap *map);
 #ifdef CONFIG_DEBUG_FS
 	void (*debugfs_init)(struct regmap *map);
 #endif
@@ -247,6 +263,8 @@ int regcache_sync_block(struct regmap *map, void *block,
 			unsigned long *cache_present,
 			unsigned int block_base, unsigned int start,
 			unsigned int end);
+bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
+			     unsigned int val);
 
 static inline const void *regcache_get_val_addr(struct regmap *map,
 						const void *base,
@@ -257,9 +275,10 @@ static inline const void *regcache_get_val_addr(struct regmap *map,
 
 unsigned int regcache_get_val(struct regmap *map, const void *base,
 			      unsigned int idx);
-bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
+void regcache_set_val(struct regmap *map, void *base, unsigned int idx,
 		      unsigned int val);
 int regcache_lookup_reg(struct regmap *map, unsigned int reg);
+int regcache_sync_val(struct regmap *map, unsigned int reg, unsigned int val);
 
 int _regmap_raw_write(struct regmap *map, unsigned int reg,
 		      const void *val, size_t val_len, bool noinc);
@@ -270,8 +289,9 @@ enum regmap_endian regmap_get_val_endian(struct device *dev,
 					 const struct regmap_bus *bus,
 					 const struct regmap_config *config);
 
+extern struct regcache_ops regcache_flat_sparse_ops;
 extern struct regcache_ops regcache_rbtree_ops;
-extern struct regcache_ops regcache_lzo_ops;
+extern struct regcache_ops regcache_maple_ops;
 extern struct regcache_ops regcache_flat_ops;
 
 static inline const char *regmap_name(const struct regmap *map)
@@ -297,4 +317,34 @@ static inline unsigned int regcache_get_index_by_order(const struct regmap *map,
 	return reg >> map->reg_stride_order;
 }
 
+struct regmap_ram_data {
+	unsigned int *vals;  /* Allocatd by caller */
+	bool *read;
+	bool *written;
+	enum regmap_endian reg_endian;
+	bool (*noinc_reg)(struct regmap_ram_data *data, unsigned int reg);
+};
+
+/*
+ * Create a test register map with data stored in RAM, not intended
+ * for practical use.
+ */
+struct regmap *__regmap_init_ram(struct device *dev,
+				 const struct regmap_config *config,
+				 struct regmap_ram_data *data,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
+
+#define regmap_init_ram(dev, config, data)					\
+	__regmap_lockdep_wrapper(__regmap_init_ram, #dev, dev, config, data)
+
+struct regmap *__regmap_init_raw_ram(struct device *dev,
+				     const struct regmap_config *config,
+				     struct regmap_ram_data *data,
+				     struct lock_class_key *lock_key,
+				     const char *lock_name);
+
+#define regmap_init_raw_ram(dev, config, data)				\
+	__regmap_lockdep_wrapper(__regmap_init_raw_ram, #dev, dev, config, data)
+
 #endif
diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c
index b7e4b2464102..53cc59c84e2f 100644
--- a/drivers/base/regmap/regcache-flat.c
+++ b/drivers/base/regmap/regcache-flat.c
@@ -6,7 +6,11 @@
 //
 // Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
 
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/device.h>
+#include <linux/limits.h>
+#include <linux/overflow.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 
@@ -18,46 +22,92 @@ static inline unsigned int regcache_flat_get_index(const struct regmap *map,
 	return regcache_get_index_by_order(map, reg);
 }
 
+struct regcache_flat_data {
+	unsigned long *valid;
+	unsigned int data[];
+};
+
 static int regcache_flat_init(struct regmap *map)
 {
-	int i;
-	unsigned int *cache;
+	unsigned int cache_size;
+	struct regcache_flat_data *cache;
 
-	if (!map || map->reg_stride_order < 0 || !map->max_register)
+	if (!map || map->reg_stride_order < 0 || !map->max_register_is_set)
 		return -EINVAL;
 
-	map->cache = kcalloc(regcache_flat_get_index(map, map->max_register)
-			     + 1, sizeof(unsigned int), GFP_KERNEL);
-	if (!map->cache)
+	cache_size = regcache_flat_get_index(map, map->max_register) + 1;
+	cache = kzalloc(struct_size(cache, data, cache_size), map->alloc_flags);
+	if (!cache)
 		return -ENOMEM;
 
-	cache = map->cache;
+	cache->valid = bitmap_zalloc(cache_size, map->alloc_flags);
+	if (!cache->valid)
+		goto err_free;
+
+	map->cache = cache;
+
+	return 0;
+
+err_free:
+	kfree(cache);
+	return -ENOMEM;
+}
+
+static int regcache_flat_exit(struct regmap *map)
+{
+	struct regcache_flat_data *cache = map->cache;
+
+	if (cache)
+		bitmap_free(cache->valid);
+
+	kfree(cache);
+	map->cache = NULL;
+
+	return 0;
+}
+
+static int regcache_flat_populate(struct regmap *map)
+{
+	struct regcache_flat_data *cache = map->cache;
+	unsigned int i;
 
 	for (i = 0; i < map->num_reg_defaults; i++) {
 		unsigned int reg = map->reg_defaults[i].reg;
 		unsigned int index = regcache_flat_get_index(map, reg);
 
-		cache[index] = map->reg_defaults[i].def;
+		cache->data[index] = map->reg_defaults[i].def;
+		__set_bit(index, cache->valid);
 	}
 
 	return 0;
 }
 
-static int regcache_flat_exit(struct regmap *map)
+static int regcache_flat_read(struct regmap *map,
+			      unsigned int reg, unsigned int *value)
 {
-	kfree(map->cache);
-	map->cache = NULL;
+	struct regcache_flat_data *cache = map->cache;
+	unsigned int index = regcache_flat_get_index(map, reg);
+
+	/* legacy behavior: ignore validity, but warn the user */
+	if (unlikely(!test_bit(index, cache->valid)))
+		dev_warn_once(map->dev,
+			"using zero-initialized flat cache, this may cause unexpected behavior");
+
+	*value = cache->data[index];
 
 	return 0;
 }
 
-static int regcache_flat_read(struct regmap *map,
-			      unsigned int reg, unsigned int *value)
+static int regcache_flat_sparse_read(struct regmap *map,
+				     unsigned int reg, unsigned int *value)
 {
-	unsigned int *cache = map->cache;
+	struct regcache_flat_data *cache = map->cache;
 	unsigned int index = regcache_flat_get_index(map, reg);
 
-	*value = cache[index];
+	if (unlikely(!test_bit(index, cache->valid)))
+		return -ENOENT;
+
+	*value = cache->data[index];
 
 	return 0;
 }
@@ -65,10 +115,23 @@ static int regcache_flat_read(struct regmap *map,
 static int regcache_flat_write(struct regmap *map, unsigned int reg,
 			       unsigned int value)
 {
-	unsigned int *cache = map->cache;
+	struct regcache_flat_data *cache = map->cache;
 	unsigned int index = regcache_flat_get_index(map, reg);
 
-	cache[index] = value;
+	cache->data[index] = value;
+	__set_bit(index, cache->valid);
+
+	return 0;
+}
+
+static int regcache_flat_drop(struct regmap *map, unsigned int min,
+			      unsigned int max)
+{
+	struct regcache_flat_data *cache = map->cache;
+	unsigned int bitmap_min = regcache_flat_get_index(map, min);
+	unsigned int bitmap_max = regcache_flat_get_index(map, max);
+
+	bitmap_clear(cache->valid, bitmap_min, bitmap_max + 1 - bitmap_min);
 
 	return 0;
 }
@@ -78,6 +141,18 @@ struct regcache_ops regcache_flat_ops = {
 	.name = "flat",
 	.init = regcache_flat_init,
 	.exit = regcache_flat_exit,
+	.populate = regcache_flat_populate,
 	.read = regcache_flat_read,
 	.write = regcache_flat_write,
 };
+
+struct regcache_ops regcache_flat_sparse_ops = {
+	.type = REGCACHE_FLAT_S,
+	.name = "flat-sparse",
+	.init = regcache_flat_init,
+	.exit = regcache_flat_exit,
+	.populate = regcache_flat_populate,
+	.read = regcache_flat_sparse_read,
+	.write = regcache_flat_write,
+	.drop = regcache_flat_drop,
+};
diff --git a/drivers/base/regmap/regcache-lzo.c b/drivers/base/regmap/regcache-lzo.c
deleted file mode 100644
index 7886303eb026..000000000000
--- a/drivers/base/regmap/regcache-lzo.c
+++ /dev/null
@@ -1,368 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-//
-// Register cache access API - LZO caching support
-//
-// Copyright 2011 Wolfson Microelectronics plc
-//
-// Author: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
-
-#include <linux/device.h>
-#include <linux/lzo.h>
-#include <linux/slab.h>
-
-#include "internal.h"
-
-static int regcache_lzo_exit(struct regmap *map);
-
-struct regcache_lzo_ctx {
-	void *wmem;
-	void *dst;
-	const void *src;
-	size_t src_len;
-	size_t dst_len;
-	size_t decompressed_size;
-	unsigned long *sync_bmp;
-	int sync_bmp_nbits;
-};
-
-#define LZO_BLOCK_NUM 8
-static int regcache_lzo_block_count(struct regmap *map)
-{
-	return LZO_BLOCK_NUM;
-}
-
-static int regcache_lzo_prepare(struct regcache_lzo_ctx *lzo_ctx)
-{
-	lzo_ctx->wmem = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
-	if (!lzo_ctx->wmem)
-		return -ENOMEM;
-	return 0;
-}
-
-static int regcache_lzo_compress(struct regcache_lzo_ctx *lzo_ctx)
-{
-	size_t compress_size;
-	int ret;
-
-	ret = lzo1x_1_compress(lzo_ctx->src, lzo_ctx->src_len,
-			       lzo_ctx->dst, &compress_size, lzo_ctx->wmem);
-	if (ret != LZO_E_OK || compress_size > lzo_ctx->dst_len)
-		return -EINVAL;
-	lzo_ctx->dst_len = compress_size;
-	return 0;
-}
-
-static int regcache_lzo_decompress(struct regcache_lzo_ctx *lzo_ctx)
-{
-	size_t dst_len;
-	int ret;
-
-	dst_len = lzo_ctx->dst_len;
-	ret = lzo1x_decompress_safe(lzo_ctx->src, lzo_ctx->src_len,
-				    lzo_ctx->dst, &dst_len);
-	if (ret != LZO_E_OK || dst_len != lzo_ctx->dst_len)
-		return -EINVAL;
-	return 0;
-}
-
-static int regcache_lzo_compress_cache_block(struct regmap *map,
-		struct regcache_lzo_ctx *lzo_ctx)
-{
-	int ret;
-
-	lzo_ctx->dst_len = lzo1x_worst_compress(PAGE_SIZE);
-	lzo_ctx->dst = kmalloc(lzo_ctx->dst_len, GFP_KERNEL);
-	if (!lzo_ctx->dst) {
-		lzo_ctx->dst_len = 0;
-		return -ENOMEM;
-	}
-
-	ret = regcache_lzo_compress(lzo_ctx);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static int regcache_lzo_decompress_cache_block(struct regmap *map,
-		struct regcache_lzo_ctx *lzo_ctx)
-{
-	int ret;
-
-	lzo_ctx->dst_len = lzo_ctx->decompressed_size;
-	lzo_ctx->dst = kmalloc(lzo_ctx->dst_len, GFP_KERNEL);
-	if (!lzo_ctx->dst) {
-		lzo_ctx->dst_len = 0;
-		return -ENOMEM;
-	}
-
-	ret = regcache_lzo_decompress(lzo_ctx);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static inline int regcache_lzo_get_blkindex(struct regmap *map,
-					    unsigned int reg)
-{
-	return ((reg / map->reg_stride) * map->cache_word_size) /
-		DIV_ROUND_UP(map->cache_size_raw,
-			     regcache_lzo_block_count(map));
-}
-
-static inline int regcache_lzo_get_blkpos(struct regmap *map,
-					  unsigned int reg)
-{
-	return (reg / map->reg_stride) %
-		    (DIV_ROUND_UP(map->cache_size_raw,
-				  regcache_lzo_block_count(map)) /
-		     map->cache_word_size);
-}
-
-static inline int regcache_lzo_get_blksize(struct regmap *map)
-{
-	return DIV_ROUND_UP(map->cache_size_raw,
-			    regcache_lzo_block_count(map));
-}
-
-static int regcache_lzo_init(struct regmap *map)
-{
-	struct regcache_lzo_ctx **lzo_blocks;
-	size_t bmp_size;
-	int ret, i, blksize, blkcount;
-	const char *p, *end;
-	unsigned long *sync_bmp;
-
-	ret = 0;
-
-	blkcount = regcache_lzo_block_count(map);
-	map->cache = kcalloc(blkcount, sizeof(*lzo_blocks),
-			     GFP_KERNEL);
-	if (!map->cache)
-		return -ENOMEM;
-	lzo_blocks = map->cache;
-
-	/*
-	 * allocate a bitmap to be used when syncing the cache with
-	 * the hardware.  Each time a register is modified, the corresponding
-	 * bit is set in the bitmap, so we know that we have to sync
-	 * that register.
-	 */
-	bmp_size = map->num_reg_defaults_raw;
-	sync_bmp = bitmap_zalloc(bmp_size, GFP_KERNEL);
-	if (!sync_bmp) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	/* allocate the lzo blocks and initialize them */
-	for (i = 0; i < blkcount; i++) {
-		lzo_blocks[i] = kzalloc(sizeof **lzo_blocks,
-					GFP_KERNEL);
-		if (!lzo_blocks[i]) {
-			bitmap_free(sync_bmp);
-			ret = -ENOMEM;
-			goto err;
-		}
-		lzo_blocks[i]->sync_bmp = sync_bmp;
-		lzo_blocks[i]->sync_bmp_nbits = bmp_size;
-		/* alloc the working space for the compressed block */
-		ret = regcache_lzo_prepare(lzo_blocks[i]);
-		if (ret < 0)
-			goto err;
-	}
-
-	blksize = regcache_lzo_get_blksize(map);
-	p = map->reg_defaults_raw;
-	end = map->reg_defaults_raw + map->cache_size_raw;
-	/* compress the register map and fill the lzo blocks */
-	for (i = 0; i < blkcount; i++, p += blksize) {
-		lzo_blocks[i]->src = p;
-		if (p + blksize > end)
-			lzo_blocks[i]->src_len = end - p;
-		else
-			lzo_blocks[i]->src_len = blksize;
-		ret = regcache_lzo_compress_cache_block(map,
-						       lzo_blocks[i]);
-		if (ret < 0)
-			goto err;
-		lzo_blocks[i]->decompressed_size =
-			lzo_blocks[i]->src_len;
-	}
-
-	return 0;
-err:
-	regcache_lzo_exit(map);
-	return ret;
-}
-
-static int regcache_lzo_exit(struct regmap *map)
-{
-	struct regcache_lzo_ctx **lzo_blocks;
-	int i, blkcount;
-
-	lzo_blocks = map->cache;
-	if (!lzo_blocks)
-		return 0;
-
-	blkcount = regcache_lzo_block_count(map);
-	/*
-	 * the pointer to the bitmap used for syncing the cache
-	 * is shared amongst all lzo_blocks.  Ensure it is freed
-	 * only once.
-	 */
-	if (lzo_blocks[0])
-		bitmap_free(lzo_blocks[0]->sync_bmp);
-	for (i = 0; i < blkcount; i++) {
-		if (lzo_blocks[i]) {
-			kfree(lzo_blocks[i]->wmem);
-			kfree(lzo_blocks[i]->dst);
-		}
-		/* each lzo_block is a pointer returned by kmalloc or NULL */
-		kfree(lzo_blocks[i]);
-	}
-	kfree(lzo_blocks);
-	map->cache = NULL;
-	return 0;
-}
-
-static int regcache_lzo_read(struct regmap *map,
-			     unsigned int reg, unsigned int *value)
-{
-	struct regcache_lzo_ctx *lzo_block, **lzo_blocks;
-	int ret, blkindex, blkpos;
-	size_t tmp_dst_len;
-	void *tmp_dst;
-
-	/* index of the compressed lzo block */
-	blkindex = regcache_lzo_get_blkindex(map, reg);
-	/* register index within the decompressed block */
-	blkpos = regcache_lzo_get_blkpos(map, reg);
-	lzo_blocks = map->cache;
-	lzo_block = lzo_blocks[blkindex];
-
-	/* save the pointer and length of the compressed block */
-	tmp_dst = lzo_block->dst;
-	tmp_dst_len = lzo_block->dst_len;
-
-	/* prepare the source to be the compressed block */
-	lzo_block->src = lzo_block->dst;
-	lzo_block->src_len = lzo_block->dst_len;
-
-	/* decompress the block */
-	ret = regcache_lzo_decompress_cache_block(map, lzo_block);
-	if (ret >= 0)
-		/* fetch the value from the cache */
-		*value = regcache_get_val(map, lzo_block->dst, blkpos);
-
-	kfree(lzo_block->dst);
-	/* restore the pointer and length of the compressed block */
-	lzo_block->dst = tmp_dst;
-	lzo_block->dst_len = tmp_dst_len;
-
-	return ret;
-}
-
-static int regcache_lzo_write(struct regmap *map,
-			      unsigned int reg, unsigned int value)
-{
-	struct regcache_lzo_ctx *lzo_block, **lzo_blocks;
-	int ret, blkindex, blkpos;
-	size_t tmp_dst_len;
-	void *tmp_dst;
-
-	/* index of the compressed lzo block */
-	blkindex = regcache_lzo_get_blkindex(map, reg);
-	/* register index within the decompressed block */
-	blkpos = regcache_lzo_get_blkpos(map, reg);
-	lzo_blocks = map->cache;
-	lzo_block = lzo_blocks[blkindex];
-
-	/* save the pointer and length of the compressed block */
-	tmp_dst = lzo_block->dst;
-	tmp_dst_len = lzo_block->dst_len;
-
-	/* prepare the source to be the compressed block */
-	lzo_block->src = lzo_block->dst;
-	lzo_block->src_len = lzo_block->dst_len;
-
-	/* decompress the block */
-	ret = regcache_lzo_decompress_cache_block(map, lzo_block);
-	if (ret < 0) {
-		kfree(lzo_block->dst);
-		goto out;
-	}
-
-	/* write the new value to the cache */
-	if (regcache_set_val(map, lzo_block->dst, blkpos, value)) {
-		kfree(lzo_block->dst);
-		goto out;
-	}
-
-	/* prepare the source to be the decompressed block */
-	lzo_block->src = lzo_block->dst;
-	lzo_block->src_len = lzo_block->dst_len;
-
-	/* compress the block */
-	ret = regcache_lzo_compress_cache_block(map, lzo_block);
-	if (ret < 0) {
-		kfree(lzo_block->dst);
-		kfree(lzo_block->src);
-		goto out;
-	}
-
-	/* set the bit so we know we have to sync this register */
-	set_bit(reg / map->reg_stride, lzo_block->sync_bmp);
-	kfree(tmp_dst);
-	kfree(lzo_block->src);
-	return 0;
-out:
-	lzo_block->dst = tmp_dst;
-	lzo_block->dst_len = tmp_dst_len;
-	return ret;
-}
-
-static int regcache_lzo_sync(struct regmap *map, unsigned int min,
-			     unsigned int max)
-{
-	struct regcache_lzo_ctx **lzo_blocks;
-	unsigned int val;
-	int i;
-	int ret;
-
-	lzo_blocks = map->cache;
-	i = min;
-	for_each_set_bit_from(i, lzo_blocks[0]->sync_bmp,
-			      lzo_blocks[0]->sync_bmp_nbits) {
-		if (i > max)
-			continue;
-
-		ret = regcache_read(map, i, &val);
-		if (ret)
-			return ret;
-
-		/* Is this the hardware default?  If so skip. */
-		ret = regcache_lookup_reg(map, i);
-		if (ret > 0 && val == map->reg_defaults[ret].def)
-			continue;
-
-		map->cache_bypass = true;
-		ret = _regmap_write(map, i, val);
-		map->cache_bypass = false;
-		if (ret)
-			return ret;
-		dev_dbg(map->dev, "Synced register %#x, value %#x\n",
-			i, val);
-	}
-
-	return 0;
-}
-
-struct regcache_ops regcache_lzo_ops = {
-	.type = REGCACHE_COMPRESSED,
-	.name = "lzo",
-	.init = regcache_lzo_init,
-	.exit = regcache_lzo_exit,
-	.read = regcache_lzo_read,
-	.write = regcache_lzo_write,
-	.sync = regcache_lzo_sync
-};
diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
new file mode 100644
index 000000000000..ca1c72b68f31
--- /dev/null
+++ b/drivers/base/regmap/regcache-maple.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Register cache access API - maple tree based cache
+//
+// Copyright 2023 Arm, Ltd
+//
+// Author: Mark Brown <broonie@kernel.org>
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/maple_tree.h>
+#include <linux/slab.h>
+
+#include "internal.h"
+
+static int regcache_maple_read(struct regmap *map,
+			       unsigned int reg, unsigned int *value)
+{
+	struct maple_tree *mt = map->cache;
+	MA_STATE(mas, mt, reg, reg);
+	unsigned long *entry;
+
+	rcu_read_lock();
+
+	entry = mas_walk(&mas);
+	if (!entry) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	*value = entry[reg - mas.index];
+
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int regcache_maple_write(struct regmap *map, unsigned int reg,
+				unsigned int val)
+{
+	struct maple_tree *mt = map->cache;
+	MA_STATE(mas, mt, reg, reg);
+	unsigned long *entry, *upper, *lower;
+	unsigned long index, last;
+	size_t lower_sz, upper_sz;
+	int ret;
+
+	rcu_read_lock();
+
+	entry = mas_walk(&mas);
+	if (entry) {
+		entry[reg - mas.index] = val;
+		rcu_read_unlock();
+		return 0;
+	}
+
+	/* Any adjacent entries to extend/merge? */
+	mas_set_range(&mas, reg - 1, reg + 1);
+	index = reg;
+	last = reg;
+
+	lower = mas_find(&mas, reg - 1);
+	if (lower) {
+		index = mas.index;
+		lower_sz = (mas.last - mas.index + 1) * sizeof(unsigned long);
+	}
+
+	upper = mas_find(&mas, reg + 1);
+	if (upper) {
+		last = mas.last;
+		upper_sz = (mas.last - mas.index + 1) * sizeof(unsigned long);
+	}
+
+	rcu_read_unlock();
+
+	entry = kmalloc_array(last - index + 1, sizeof(*entry), map->alloc_flags);
+	if (!entry)
+		return -ENOMEM;
+
+	if (lower)
+		memcpy(entry, lower, lower_sz);
+	entry[reg - index] = val;
+	if (upper)
+		memcpy(&entry[reg - index + 1], upper, upper_sz);
+
+	/*
+	 * This is safe because the regmap lock means the Maple lock
+	 * is redundant, but we need to take it due to lockdep asserts
+	 * in the maple tree code.
+	 */
+	mas_lock(&mas);
+
+	mas_set_range(&mas, index, last);
+	ret = mas_store_gfp(&mas, entry, map->alloc_flags);
+
+	mas_unlock(&mas);
+
+	if (ret == 0) {
+		kfree(lower);
+		kfree(upper);
+	}
+	
+	return ret;
+}
+
+static int regcache_maple_drop(struct regmap *map, unsigned int min,
+			       unsigned int max)
+{
+	struct maple_tree *mt = map->cache;
+	MA_STATE(mas, mt, min, max);
+	unsigned long *entry, *lower, *upper;
+	/* initialized to work around false-positive -Wuninitialized warning */
+	unsigned long lower_index = 0, lower_last = 0;
+	unsigned long upper_index, upper_last;
+	int ret = 0;
+
+	lower = NULL;
+	upper = NULL;
+
+	mas_lock(&mas);
+
+	mas_for_each(&mas, entry, max) {
+		/*
+		 * This is safe because the regmap lock means the
+		 * Maple lock is redundant, but we need to take it due
+		 * to lockdep asserts in the maple tree code.
+		 */
+		mas_unlock(&mas);
+
+		/* Do we need to save any of this entry? */
+		if (mas.index < min) {
+			lower_index = mas.index;
+			lower_last = min -1;
+
+			lower = kmemdup_array(entry,
+					      min - mas.index, sizeof(*lower),
+					      map->alloc_flags);
+			if (!lower) {
+				ret = -ENOMEM;
+				goto out_unlocked;
+			}
+		}
+
+		if (mas.last > max) {
+			upper_index = max + 1;
+			upper_last = mas.last;
+
+			upper = kmemdup_array(&entry[max - mas.index + 1],
+					      mas.last - max, sizeof(*upper),
+					      map->alloc_flags);
+			if (!upper) {
+				ret = -ENOMEM;
+				goto out_unlocked;
+			}
+		}
+
+		kfree(entry);
+		mas_lock(&mas);
+		mas_erase(&mas);
+
+		/* Insert new nodes with the saved data */
+		if (lower) {
+			mas_set_range(&mas, lower_index, lower_last);
+			ret = mas_store_gfp(&mas, lower, map->alloc_flags);
+			if (ret != 0)
+				goto out;
+			lower = NULL;
+		}
+
+		if (upper) {
+			mas_set_range(&mas, upper_index, upper_last);
+			ret = mas_store_gfp(&mas, upper, map->alloc_flags);
+			if (ret != 0)
+				goto out;
+			upper = NULL;
+		}
+	}
+
+out:
+	mas_unlock(&mas);
+out_unlocked:
+	kfree(lower);
+	kfree(upper);
+
+	return ret;
+}
+
+static int regcache_maple_sync_block(struct regmap *map, unsigned long *entry,
+				     struct ma_state *mas,
+				     unsigned int min, unsigned int max)
+{
+	void *buf;
+	unsigned long r;
+	size_t val_bytes = map->format.val_bytes;
+	int ret = 0;
+
+	mas_pause(mas);
+	rcu_read_unlock();
+
+	/*
+	 * Use a raw write if writing more than one register to a
+	 * device that supports raw writes to reduce transaction
+	 * overheads.
+	 */
+	if (max - min > 1 && regmap_can_raw_write(map)) {
+		buf = kmalloc_array(max - min, val_bytes, map->alloc_flags);
+		if (!buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		/* Render the data for a raw write */
+		for (r = min; r < max; r++) {
+			regcache_set_val(map, buf, r - min,
+					 entry[r - mas->index]);
+		}
+
+		ret = _regmap_raw_write(map, min, buf, (max - min) * val_bytes,
+					false);
+
+		kfree(buf);
+	} else {
+		for (r = min; r < max; r++) {
+			ret = _regmap_write(map, r,
+					    entry[r - mas->index]);
+			if (ret != 0)
+				goto out;
+		}
+	}
+
+out:
+	rcu_read_lock();
+
+	return ret;
+}
+
+static int regcache_maple_sync(struct regmap *map, unsigned int min,
+			       unsigned int max)
+{
+	struct maple_tree *mt = map->cache;
+	unsigned long *entry;
+	MA_STATE(mas, mt, min, max);
+	unsigned long lmin = min;
+	unsigned long lmax = max;
+	unsigned int r, v, sync_start;
+	int ret = 0;
+	bool sync_needed = false;
+
+	map->cache_bypass = true;
+
+	rcu_read_lock();
+
+	mas_for_each(&mas, entry, max) {
+		for (r = max(mas.index, lmin); r <= min(mas.last, lmax); r++) {
+			v = entry[r - mas.index];
+
+			if (regcache_reg_needs_sync(map, r, v)) {
+				if (!sync_needed) {
+					sync_start = r;
+					sync_needed = true;
+				}
+				continue;
+			}
+
+			if (!sync_needed)
+				continue;
+
+			ret = regcache_maple_sync_block(map, entry, &mas,
+							sync_start, r);
+			if (ret != 0)
+				goto out;
+			sync_needed = false;
+		}
+
+		if (sync_needed) {
+			ret = regcache_maple_sync_block(map, entry, &mas,
+							sync_start, r);
+			if (ret != 0)
+				goto out;
+			sync_needed = false;
+		}
+	}
+
+out:
+	rcu_read_unlock();
+
+	map->cache_bypass = false;
+
+	return ret;
+}
+
+static int regcache_maple_init(struct regmap *map)
+{
+	struct maple_tree *mt;
+
+	mt = kmalloc(sizeof(*mt), map->alloc_flags);
+	if (!mt)
+		return -ENOMEM;
+	map->cache = mt;
+
+	mt_init(mt);
+
+	if (!mt_external_lock(mt) && map->lock_key)
+		lockdep_set_class_and_subclass(&mt->ma_lock, map->lock_key, 1);
+
+	return 0;
+}
+
+static int regcache_maple_exit(struct regmap *map)
+{
+	struct maple_tree *mt = map->cache;
+	MA_STATE(mas, mt, 0, UINT_MAX);
+	unsigned int *entry;
+
+	/* if we've already been called then just return */
+	if (!mt)
+		return 0;
+
+	mas_lock(&mas);
+	mas_for_each(&mas, entry, UINT_MAX)
+		kfree(entry);
+	__mt_destroy(mt);
+	mas_unlock(&mas);
+
+	kfree(mt);
+	map->cache = NULL;
+
+	return 0;
+}
+
+static int regcache_maple_insert_block(struct regmap *map, int first,
+					int last)
+{
+	struct maple_tree *mt = map->cache;
+	MA_STATE(mas, mt, first, last);
+	unsigned long *entry;
+	int i, ret;
+
+	entry = kmalloc_array(last - first + 1, sizeof(*entry), map->alloc_flags);
+	if (!entry)
+		return -ENOMEM;
+
+	for (i = 0; i < last - first + 1; i++)
+		entry[i] = map->reg_defaults[first + i].def;
+
+	mas_lock(&mas);
+
+	mas_set_range(&mas, map->reg_defaults[first].reg,
+		      map->reg_defaults[last].reg);
+	ret = mas_store_gfp(&mas, entry, map->alloc_flags);
+
+	mas_unlock(&mas);
+
+	if (ret)
+		kfree(entry);
+
+	return ret;
+}
+
+static int regcache_maple_populate(struct regmap *map)
+{
+	int i;
+	int ret;
+	int range_start;
+
+	range_start = 0;
+
+	/* Scan for ranges of contiguous registers */
+	for (i = 1; i < map->num_reg_defaults; i++) {
+		if (map->reg_defaults[i].reg !=
+		    map->reg_defaults[i - 1].reg + 1) {
+			ret = regcache_maple_insert_block(map, range_start,
+							  i - 1);
+			if (ret != 0)
+				return ret;
+
+			range_start = i;
+		}
+	}
+
+	/* Add the last block */
+	return regcache_maple_insert_block(map, range_start, map->num_reg_defaults - 1);
+}
+
+struct regcache_ops regcache_maple_ops = {
+	.type = REGCACHE_MAPLE,
+	.name = "maple",
+	.init = regcache_maple_init,
+	.exit = regcache_maple_exit,
+	.populate = regcache_maple_populate,
+	.read = regcache_maple_read,
+	.write = regcache_maple_write,
+	.drop = regcache_maple_drop,
+	.sync = regcache_maple_sync,
+};
diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index cfa29dc89bbf..3344b82c3799 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -22,7 +22,7 @@ struct regcache_rbtree_node {
 	/* block of adjacent registers */
 	void *block;
 	/* Which registers are present */
-	long *cache_present;
+	unsigned long *cache_present;
 	/* base register handled by this block */
 	unsigned int base_reg;
 	/* number of registers available in the block */
@@ -184,10 +184,8 @@ static void rbtree_debugfs_init(struct regmap *map)
 static int regcache_rbtree_init(struct regmap *map)
 {
 	struct regcache_rbtree_ctx *rbtree_ctx;
-	int i;
-	int ret;
 
-	map->cache = kmalloc(sizeof *rbtree_ctx, GFP_KERNEL);
+	map->cache = kmalloc(sizeof *rbtree_ctx, map->alloc_flags);
 	if (!map->cache)
 		return -ENOMEM;
 
@@ -195,19 +193,7 @@ static int regcache_rbtree_init(struct regmap *map)
 	rbtree_ctx->root = RB_ROOT;
 	rbtree_ctx->cached_rbnode = NULL;
 
-	for (i = 0; i < map->num_reg_defaults; i++) {
-		ret = regcache_rbtree_write(map,
-					    map->reg_defaults[i].reg,
-					    map->reg_defaults[i].def);
-		if (ret)
-			goto err;
-	}
-
 	return 0;
-
-err:
-	regcache_rbtree_exit(map);
-	return ret;
 }
 
 static int regcache_rbtree_exit(struct regmap *map)
@@ -239,6 +225,22 @@ static int regcache_rbtree_exit(struct regmap *map)
 	return 0;
 }
 
+static int regcache_rbtree_populate(struct regmap *map)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < map->num_reg_defaults; i++) {
+		ret = regcache_rbtree_write(map,
+					    map->reg_defaults[i].reg,
+					    map->reg_defaults[i].def);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int regcache_rbtree_read(struct regmap *map,
 				unsigned int reg, unsigned int *value)
 {
@@ -275,20 +277,18 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
 	pos = (reg - base_reg) / map->reg_stride;
 	offset = (rbnode->base_reg - base_reg) / map->reg_stride;
 
-	blk = krealloc(rbnode->block,
-		       blklen * map->cache_word_size,
-		       GFP_KERNEL);
+	blk = krealloc_array(rbnode->block, blklen, map->cache_word_size, map->alloc_flags);
 	if (!blk)
 		return -ENOMEM;
 
+	rbnode->block = blk;
+
 	if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) {
-		present = krealloc(rbnode->cache_present,
-				   BITS_TO_LONGS(blklen) * sizeof(*present),
-				   GFP_KERNEL);
-		if (!present) {
-			kfree(blk);
+		present = krealloc_array(rbnode->cache_present,
+					 BITS_TO_LONGS(blklen), sizeof(*present),
+					 map->alloc_flags);
+		if (!present)
 			return -ENOMEM;
-		}
 
 		memset(present + BITS_TO_LONGS(rbnode->blklen), 0,
 		       (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen))
@@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
 	}
 
 	/* update the rbnode block, its size and the base register */
-	rbnode->block = blk;
 	rbnode->blklen = blklen;
 	rbnode->base_reg = base_reg;
 	rbnode->cache_present = present;
@@ -321,7 +320,7 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
 	const struct regmap_range *range;
 	int i;
 
-	rbnode = kzalloc(sizeof(*rbnode), GFP_KERNEL);
+	rbnode = kzalloc(sizeof(*rbnode), map->alloc_flags);
 	if (!rbnode)
 		return NULL;
 
@@ -347,13 +346,13 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
 	}
 
 	rbnode->block = kmalloc_array(rbnode->blklen, map->cache_word_size,
-				      GFP_KERNEL);
+				      map->alloc_flags);
 	if (!rbnode->block)
 		goto err_free;
 
 	rbnode->cache_present = kcalloc(BITS_TO_LONGS(rbnode->blklen),
 					sizeof(*rbnode->cache_present),
-					GFP_KERNEL);
+					map->alloc_flags);
 	if (!rbnode->cache_present)
 		goto err_free_block;
 
@@ -454,7 +453,8 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
 		if (!rbnode)
 			return -ENOMEM;
 		regcache_rbtree_set_register(map, rbnode,
-					     reg - rbnode->base_reg, value);
+					     (reg - rbnode->base_reg) / map->reg_stride,
+					     value);
 		regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode);
 		rbtree_ctx->cached_rbnode = rbnode;
 	}
@@ -472,6 +472,8 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
 	unsigned int start, end;
 	int ret;
 
+	map->async = true;
+
 	rbtree_ctx = map->cache;
 	for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
 		rbnode = rb_entry(node, struct regcache_rbtree_node, node);
@@ -500,6 +502,8 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
 			return ret;
 	}
 
+	map->async = false;
+
 	return regmap_async_complete(map);
 }
 
@@ -544,6 +548,7 @@ struct regcache_ops regcache_rbtree_ops = {
 	.name = "rbtree",
 	.init = regcache_rbtree_init,
 	.exit = regcache_rbtree_exit,
+	.populate = regcache_rbtree_populate,
 #ifdef CONFIG_DEBUG_FS
 	.debugfs_init = rbtree_debugfs_init,
 #endif
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index f2469d3435ca..319c342bf5a0 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -16,13 +16,32 @@
 #include "internal.h"
 
 static const struct regcache_ops *cache_types[] = {
+	&regcache_flat_sparse_ops,
 	&regcache_rbtree_ops,
-#if IS_ENABLED(CONFIG_REGCACHE_COMPRESSED)
-	&regcache_lzo_ops,
-#endif
+	&regcache_maple_ops,
 	&regcache_flat_ops,
 };
 
+static int regcache_defaults_cmp(const void *a, const void *b)
+{
+	const struct reg_default *x = a;
+	const struct reg_default *y = b;
+
+	if (x->reg > y->reg)
+		return 1;
+	else if (x->reg < y->reg)
+		return -1;
+	else
+		return 0;
+}
+
+void regcache_sort_defaults(struct reg_default *defaults, unsigned int ndefaults)
+{
+	sort(defaults, ndefaults, sizeof(*defaults),
+	     regcache_defaults_cmp, NULL);
+}
+EXPORT_SYMBOL_GPL(regcache_sort_defaults);
+
 static int regcache_hw_init(struct regmap *map)
 {
 	int i, j;
@@ -133,6 +152,12 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 		return -EINVAL;
 	}
 
+	if (config->num_reg_defaults && !config->reg_defaults) {
+		dev_err(map->dev,
+			"Register defaults number are set without the reg!\n");
+		return -EINVAL;
+	}
+
 	for (i = 0; i < config->num_reg_defaults; i++)
 		if (config->reg_defaults[i].reg % map->reg_stride)
 			return -EINVAL;
@@ -142,7 +167,7 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 			break;
 
 	if (i == ARRAY_SIZE(cache_types)) {
-		dev_err(map->dev, "Could not match compress type: %d\n",
+		dev_err(map->dev, "Could not match cache type: %d\n",
 			map->cache_type);
 		return -EINVAL;
 	}
@@ -150,7 +175,7 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 	map->num_reg_defaults = config->num_reg_defaults;
 	map->num_reg_defaults_raw = config->num_reg_defaults_raw;
 	map->reg_defaults_raw = config->reg_defaults_raw;
-	map->cache_word_size = DIV_ROUND_UP(config->val_bits, 8);
+	map->cache_word_size = BITS_TO_BYTES(config->val_bits);
 	map->cache_size_raw = map->cache_word_size * config->num_reg_defaults_raw;
 
 	map->cache = NULL;
@@ -166,8 +191,8 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 	 * a copy of it.
 	 */
 	if (config->reg_defaults) {
-		tmp_buf = kmemdup(config->reg_defaults, map->num_reg_defaults *
-				  sizeof(struct reg_default), GFP_KERNEL);
+		tmp_buf = kmemdup_array(config->reg_defaults, map->num_reg_defaults,
+					sizeof(*map->reg_defaults), GFP_KERNEL);
 		if (!tmp_buf)
 			return -ENOMEM;
 		map->reg_defaults = tmp_buf;
@@ -183,18 +208,38 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
 			return 0;
 	}
 
-	if (!map->max_register)
-		map->max_register = map->num_reg_defaults_raw;
+	if (!map->max_register_is_set && map->num_reg_defaults_raw) {
+		map->max_register = (map->num_reg_defaults_raw  - 1) * map->reg_stride;
+		map->max_register_is_set = true;
+	}
 
 	if (map->cache_ops->init) {
 		dev_dbg(map->dev, "Initializing %s cache\n",
 			map->cache_ops->name);
+		map->lock(map->lock_arg);
 		ret = map->cache_ops->init(map);
+		map->unlock(map->lock_arg);
 		if (ret)
 			goto err_free;
 	}
+
+	if (map->num_reg_defaults && map->cache_ops->populate) {
+		dev_dbg(map->dev, "Populating %s cache\n", map->cache_ops->name);
+		map->lock(map->lock_arg);
+		ret = map->cache_ops->populate(map);
+		map->unlock(map->lock_arg);
+		if (ret)
+			goto err_exit;
+	}
 	return 0;
 
+err_exit:
+	if (map->cache_ops->exit) {
+		dev_dbg(map->dev, "Destroying %s cache\n", map->cache_ops->name);
+		map->lock(map->lock_arg);
+		ret = map->cache_ops->exit(map);
+		map->unlock(map->lock_arg);
+	}
 err_free:
 	kfree(map->reg_defaults);
 	if (map->cache_free)
@@ -217,7 +262,9 @@ void regcache_exit(struct regmap *map)
 	if (map->cache_ops->exit) {
 		dev_dbg(map->dev, "Destroying %s cache\n",
 			map->cache_ops->name);
+		map->lock(map->lock_arg);
 		map->cache_ops->exit(map);
+		map->unlock(map->lock_arg);
 	}
 }
 
@@ -236,7 +283,7 @@ int regcache_read(struct regmap *map,
 	int ret;
 
 	if (map->cache_type == REGCACHE_NONE)
-		return -ENOSYS;
+		return -EINVAL;
 
 	BUG_ON(!map->cache_ops);
 
@@ -275,11 +322,14 @@ int regcache_write(struct regmap *map,
 	return 0;
 }
 
-static bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
-				    unsigned int val)
+bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
+			     unsigned int val)
 {
 	int ret;
 
+	if (!regmap_writeable(map, reg))
+		return false;
+
 	/* If we don't know the chip just got reset, then sync everything. */
 	if (!map->no_sync_defaults)
 		return true;
@@ -305,6 +355,8 @@ static int regcache_default_sync(struct regmap *map, unsigned int min,
 			continue;
 
 		ret = regcache_read(map, reg, &val);
+		if (ret == -ENOENT)
+			continue;
 		if (ret)
 			return ret;
 
@@ -325,6 +377,11 @@ static int regcache_default_sync(struct regmap *map, unsigned int min,
 	return 0;
 }
 
+static int rbtree_all(const void *key, const struct rb_node *node)
+{
+	return 0;
+}
+
 /**
  * regcache_sync - Sync the register cache with the hardware.
  *
@@ -342,6 +399,10 @@ int regcache_sync(struct regmap *map)
 	unsigned int i;
 	const char *name;
 	bool bypass;
+	struct rb_node *node;
+
+	if (WARN_ON(map->cache_type == REGCACHE_NONE))
+		return -EINVAL;
 
 	BUG_ON(!map->cache_ops);
 
@@ -356,8 +417,6 @@ int regcache_sync(struct regmap *map)
 	if (!map->cache_dirty)
 		goto out;
 
-	map->async = true;
-
 	/* Apply any patch first */
 	map->cache_bypass = true;
 	for (i = 0; i < map->patch_regs; i++) {
@@ -380,9 +439,31 @@ int regcache_sync(struct regmap *map)
 
 out:
 	/* Restore the bypass state */
-	map->async = false;
 	map->cache_bypass = bypass;
 	map->no_sync_defaults = false;
+
+	/*
+	 * If we did any paging with cache bypassed and a cached
+	 * paging register then the register and cache state might
+	 * have gone out of sync, force writes of all the paging
+	 * registers.
+	 */
+	rb_for_each(node, NULL, &map->range_tree, rbtree_all) {
+		struct regmap_range_node *this =
+			rb_entry(node, struct regmap_range_node, node);
+
+		/* If there's nothing in the cache there's nothing to sync */
+		if (regcache_read(map, this->selector_reg, &i) != 0)
+			continue;
+
+		ret = _regmap_write(map, this->selector_reg, i);
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to write %x = %x: %d\n",
+				this->selector_reg, i, ret);
+			break;
+		}
+	}
+
 	map->unlock(map->lock_arg);
 
 	regmap_async_complete(map);
@@ -412,6 +493,9 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
 	const char *name;
 	bool bypass;
 
+	if (WARN_ON(map->cache_type == REGCACHE_NONE))
+		return -EINVAL;
+
 	BUG_ON(!map->cache_ops);
 
 	map->lock(map->lock_arg);
@@ -495,7 +579,8 @@ EXPORT_SYMBOL_GPL(regcache_drop_region);
 void regcache_cache_only(struct regmap *map, bool enable)
 {
 	map->lock(map->lock_arg);
-	WARN_ON(map->cache_bypass && enable);
+	WARN_ON(map->cache_type != REGCACHE_NONE &&
+		map->cache_bypass && enable);
 	map->cache_only = enable;
 	trace_regmap_cache_only(map, enable);
 	map->unlock(map->lock_arg);
@@ -531,7 +616,7 @@ EXPORT_SYMBOL_GPL(regcache_mark_dirty);
  * @enable: flag if changes should not be written to the cache
  *
  * When a register map is marked with the cache bypass option, writes
- * to the register map API will only update the hardware and not the
+ * to the register map API will only update the hardware and not
  * the cache directly.  This is useful when syncing the cache back to
  * the hardware.
  */
@@ -545,17 +630,37 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
 }
 EXPORT_SYMBOL_GPL(regcache_cache_bypass);
 
-bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
-		      unsigned int val)
+/**
+ * regcache_reg_cached - Check if a register is cached
+ *
+ * @map: map to check
+ * @reg: register to check
+ *
+ * Reports if a register is cached.
+ */
+bool regcache_reg_cached(struct regmap *map, unsigned int reg)
 {
-	if (regcache_get_val(map, base, idx) == val)
-		return true;
+	unsigned int val;
+	int ret;
+
+	map->lock(map->lock_arg);
 
+	ret = regcache_read(map, reg, &val);
+
+	map->unlock(map->lock_arg);
+
+	return ret == 0;
+}
+EXPORT_SYMBOL_GPL(regcache_reg_cached);
+
+void regcache_set_val(struct regmap *map, void *base, unsigned int idx,
+		      unsigned int val)
+{
 	/* Use device native format if possible */
 	if (map->format.format_val) {
 		map->format.format_val(base + (map->cache_word_size * idx),
 				       val, 0);
-		return false;
+		return;
 	}
 
 	switch (map->cache_word_size) {
@@ -577,18 +682,9 @@ bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
 		cache[idx] = val;
 		break;
 	}
-#ifdef CONFIG_64BIT
-	case 8: {
-		u64 *cache = base;
-
-		cache[idx] = val;
-		break;
-	}
-#endif
 	default:
 		BUG();
 	}
-	return false;
 }
 
 unsigned int regcache_get_val(struct regmap *map, const void *base,
@@ -618,13 +714,6 @@ unsigned int regcache_get_val(struct regmap *map, const void *base,
 
 		return cache[idx];
 	}
-#ifdef CONFIG_64BIT
-	case 8: {
-		const u64 *cache = base;
-
-		return cache[idx];
-	}
-#endif
 	default:
 		BUG();
 	}
@@ -665,6 +754,30 @@ static bool regcache_reg_present(unsigned long *cache_present, unsigned int idx)
 	return test_bit(idx, cache_present);
 }
 
+int regcache_sync_val(struct regmap *map, unsigned int reg, unsigned int val)
+{
+	int ret;
+
+	if (!regcache_reg_needs_sync(map, reg, val))
+		return 0;
+
+	map->cache_bypass = true;
+
+	ret = _regmap_write(map, reg, val);
+
+	map->cache_bypass = false;
+
+	if (ret != 0) {
+		dev_err(map->dev, "Unable to sync register %#x. %d\n",
+			reg, ret);
+		return ret;
+	}
+	dev_dbg(map->dev, "Synced register %#x, value %#x\n",
+		reg, val);
+
+	return 0;
+}
+
 static int regcache_sync_block_single(struct regmap *map, void *block,
 				      unsigned long *cache_present,
 				      unsigned int block_base,
@@ -681,21 +794,9 @@ static int regcache_sync_block_single(struct regmap *map, void *block,
 			continue;
 
 		val = regcache_get_val(map, block, i);
-		if (!regcache_reg_needs_sync(map, regtmp, val))
-			continue;
-
-		map->cache_bypass = true;
-
-		ret = _regmap_write(map, regtmp, val);
-
-		map->cache_bypass = false;
-		if (ret != 0) {
-			dev_err(map->dev, "Unable to sync register %#x. %d\n",
-				regtmp, ret);
+		ret = regcache_sync_val(map, regtmp, val);
+		if (ret != 0)
 			return ret;
-		}
-		dev_dbg(map->dev, "Synced register %#x, value %#x\n",
-			regtmp, val);
 	}
 
 	return 0;
diff --git a/drivers/base/regmap/regmap-ac97.c b/drivers/base/regmap/regmap-ac97.c
index b9f76bdf74a9..a561971c459c 100644
--- a/drivers/base/regmap/regmap-ac97.c
+++ b/drivers/base/regmap/regmap-ac97.c
@@ -86,4 +86,5 @@ struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_ac97);
 
+MODULE_DESCRIPTION("Register map access API - AC'97 support");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 211a335a608d..c9b4c04b1cf6 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -48,7 +48,7 @@ static ssize_t regmap_name_read_file(struct file *file,
 		name = map->dev->driver->name;
 
 	ret = snprintf(buf, PAGE_SIZE, "%s\n", name);
-	if (ret < 0) {
+	if (ret >= PAGE_SIZE) {
 		kfree(buf);
 		return ret;
 	}
@@ -226,8 +226,8 @@ static ssize_t regmap_read_debugfs(struct regmap *map, unsigned int from,
 	if (*ppos < 0 || !count)
 		return -EINVAL;
 
-	if (count > (PAGE_SIZE << (MAX_ORDER - 1)))
-		count = PAGE_SIZE << (MAX_ORDER - 1);
+	if (count > (PAGE_SIZE << MAX_PAGE_ORDER))
+		count = PAGE_SIZE << MAX_PAGE_ORDER;
 
 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
@@ -368,13 +368,13 @@ static ssize_t regmap_reg_ranges_read_file(struct file *file,
 	char *buf;
 	char *entry;
 	int ret;
-	unsigned entry_len;
+	unsigned int entry_len;
 
 	if (*ppos < 0 || !count)
 		return -EINVAL;
 
-	if (count > (PAGE_SIZE << (MAX_ORDER - 1)))
-		count = PAGE_SIZE << (MAX_ORDER - 1);
+	if (count > (PAGE_SIZE << MAX_PAGE_ORDER))
+		count = PAGE_SIZE << MAX_PAGE_ORDER;
 
 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
@@ -470,10 +470,6 @@ static ssize_t regmap_cache_only_write_file(struct file *file,
 	if (err)
 		return count;
 
-	err = debugfs_file_get(file->f_path.dentry);
-	if (err)
-		return err;
-
 	map->lock(map->lock_arg);
 
 	if (new_val && !map->cache_only) {
@@ -486,7 +482,6 @@ static ssize_t regmap_cache_only_write_file(struct file *file,
 	map->cache_only = new_val;
 
 	map->unlock(map->lock_arg);
-	debugfs_file_put(file->f_path.dentry);
 
 	if (require_sync) {
 		err = regcache_sync(map);
@@ -517,10 +512,6 @@ static ssize_t regmap_cache_bypass_write_file(struct file *file,
 	if (err)
 		return count;
 
-	err = debugfs_file_get(file->f_path.dentry);
-	if (err)
-		return err;
-
 	map->lock(map->lock_arg);
 
 	if (new_val && !map->cache_bypass) {
@@ -532,7 +523,6 @@ static ssize_t regmap_cache_bypass_write_file(struct file *file,
 	map->cache_bypass = new_val;
 
 	map->unlock(map->lock_arg);
-	debugfs_file_put(file->f_path.dentry);
 
 	return count;
 }
@@ -598,7 +588,7 @@ void regmap_debugfs_init(struct regmap *map)
 		map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d",
 						dummy_index);
 		if (!map->debugfs_name)
-				return;
+			return;
 		name = map->debugfs_name;
 		dummy_index++;
 	}
@@ -636,6 +626,17 @@ void regmap_debugfs_init(struct regmap *map)
 				    &regmap_cache_bypass_fops);
 	}
 
+	/*
+	 * This could interfere with driver operation. Therefore, don't provide
+	 * any real compile time configuration option for this feature. One will
+	 * have to modify the source code directly in order to use it.
+	 */
+#undef REGMAP_ALLOW_FORCE_WRITE_FIELD_DEBUGFS
+#ifdef REGMAP_ALLOW_FORCE_WRITE_FIELD_DEBUGFS
+	debugfs_create_bool("force_write_field", 0600, map->debugfs,
+			    &map->force_write_field);
+#endif
+
 	next = rb_first(&map->range_tree);
 	while (next) {
 		range_node = rb_entry(next, struct regmap_range_node, node);
diff --git a/drivers/base/regmap/regmap-fsi.c b/drivers/base/regmap/regmap-fsi.c
new file mode 100644
index 000000000000..3d2f3cb31d5e
--- /dev/null
+++ b/drivers/base/regmap/regmap-fsi.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Register map access API - FSI support
+//
+// Copyright 2022 IBM Corp
+//
+// Author: Eddie James <eajames@linux.ibm.com>
+
+#include <linux/fsi.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+#include "internal.h"
+
+static int regmap_fsi32_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	u32 v;
+	int ret;
+
+	ret = fsi_slave_read(context, reg, &v, sizeof(v));
+	if (ret)
+		return ret;
+
+	*val = v;
+	return 0;
+}
+
+static int regmap_fsi32_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	u32 v = val;
+
+	return fsi_slave_write(context, reg, &v, sizeof(v));
+}
+
+static const struct regmap_bus regmap_fsi32 = {
+	.reg_write = regmap_fsi32_reg_write,
+	.reg_read = regmap_fsi32_reg_read,
+};
+
+static int regmap_fsi32le_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	__be32 v;
+	int ret;
+
+	ret = fsi_slave_read(context, reg, &v, sizeof(v));
+	if (ret)
+		return ret;
+
+	*val = be32_to_cpu(v);
+	return 0;
+}
+
+static int regmap_fsi32le_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	__be32 v = cpu_to_be32(val);
+
+	return fsi_slave_write(context, reg, &v, sizeof(v));
+}
+
+static const struct regmap_bus regmap_fsi32le = {
+	.reg_write = regmap_fsi32le_reg_write,
+	.reg_read = regmap_fsi32le_reg_read,
+};
+
+static int regmap_fsi16_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	u16 v;
+	int ret;
+
+	ret = fsi_slave_read(context, reg, &v, sizeof(v));
+	if (ret)
+		return ret;
+
+	*val = v;
+	return 0;
+}
+
+static int regmap_fsi16_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	u16 v;
+
+	if (val > 0xffff)
+		return -EINVAL;
+
+	v = val;
+	return fsi_slave_write(context, reg, &v, sizeof(v));
+}
+
+static const struct regmap_bus regmap_fsi16 = {
+	.reg_write = regmap_fsi16_reg_write,
+	.reg_read = regmap_fsi16_reg_read,
+};
+
+static int regmap_fsi16le_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	__be16 v;
+	int ret;
+
+	ret = fsi_slave_read(context, reg, &v, sizeof(v));
+	if (ret)
+		return ret;
+
+	*val = be16_to_cpu(v);
+	return 0;
+}
+
+static int regmap_fsi16le_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	__be16 v;
+
+	if (val > 0xffff)
+		return -EINVAL;
+
+	v = cpu_to_be16(val);
+	return fsi_slave_write(context, reg, &v, sizeof(v));
+}
+
+static const struct regmap_bus regmap_fsi16le = {
+	.reg_write = regmap_fsi16le_reg_write,
+	.reg_read = regmap_fsi16le_reg_read,
+};
+
+static int regmap_fsi8_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	u8 v;
+	int ret;
+
+	ret = fsi_slave_read(context, reg, &v, sizeof(v));
+	if (ret)
+		return ret;
+
+	*val = v;
+	return 0;
+}
+
+static int regmap_fsi8_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	u8 v;
+
+	if (val > 0xff)
+		return -EINVAL;
+
+	v = val;
+	return fsi_slave_write(context, reg, &v, sizeof(v));
+}
+
+static const struct regmap_bus regmap_fsi8 = {
+	.reg_write = regmap_fsi8_reg_write,
+	.reg_read = regmap_fsi8_reg_read,
+};
+
+static const struct regmap_bus *regmap_get_fsi_bus(struct fsi_device *fsi_dev,
+						   const struct regmap_config *config)
+{
+	const struct regmap_bus *bus = NULL;
+
+	if (config->reg_bits == 8 || config->reg_bits == 16 || config->reg_bits == 32) {
+		switch (config->val_bits) {
+		case 8:
+			bus = &regmap_fsi8;
+			break;
+		case 16:
+			switch (regmap_get_val_endian(&fsi_dev->dev, NULL, config)) {
+			case REGMAP_ENDIAN_LITTLE:
+#ifdef __LITTLE_ENDIAN
+			case REGMAP_ENDIAN_NATIVE:
+#endif
+				bus = &regmap_fsi16le;
+				break;
+			case REGMAP_ENDIAN_DEFAULT:
+			case REGMAP_ENDIAN_BIG:
+#ifdef __BIG_ENDIAN
+			case REGMAP_ENDIAN_NATIVE:
+#endif
+				bus = &regmap_fsi16;
+				break;
+			default:
+				break;
+			}
+			break;
+		case 32:
+			switch (regmap_get_val_endian(&fsi_dev->dev, NULL, config)) {
+			case REGMAP_ENDIAN_LITTLE:
+#ifdef __LITTLE_ENDIAN
+			case REGMAP_ENDIAN_NATIVE:
+#endif
+				bus = &regmap_fsi32le;
+				break;
+			case REGMAP_ENDIAN_DEFAULT:
+			case REGMAP_ENDIAN_BIG:
+#ifdef __BIG_ENDIAN
+			case REGMAP_ENDIAN_NATIVE:
+#endif
+				bus = &regmap_fsi32;
+				break;
+			default:
+				break;
+			}
+			break;
+		}
+	}
+
+	return bus ?: ERR_PTR(-EOPNOTSUPP);
+}
+
+struct regmap *__regmap_init_fsi(struct fsi_device *fsi_dev, const struct regmap_config *config,
+				 struct lock_class_key *lock_key, const char *lock_name)
+{
+	const struct regmap_bus *bus = regmap_get_fsi_bus(fsi_dev, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __regmap_init(&fsi_dev->dev, bus, fsi_dev->slave, config, lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__regmap_init_fsi);
+
+struct regmap *__devm_regmap_init_fsi(struct fsi_device *fsi_dev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key, const char *lock_name)
+{
+	const struct regmap_bus *bus = regmap_get_fsi_bus(fsi_dev, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __devm_regmap_init(&fsi_dev->dev, bus, fsi_dev->slave, config, lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_fsi);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 980e5ce6a3a3..c9b39a02278e 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -242,8 +242,8 @@ static int regmap_i2c_smbus_i2c_read(void *context, const void *reg,
 static const struct regmap_bus regmap_i2c_smbus_i2c_block = {
 	.write = regmap_i2c_smbus_i2c_write,
 	.read = regmap_i2c_smbus_i2c_read,
-	.max_raw_read = I2C_SMBUS_BLOCK_MAX,
-	.max_raw_write = I2C_SMBUS_BLOCK_MAX,
+	.max_raw_read = I2C_SMBUS_BLOCK_MAX - 1,
+	.max_raw_write = I2C_SMBUS_BLOCK_MAX - 1,
 };
 
 static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data,
@@ -299,8 +299,8 @@ static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg,
 static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = {
 	.write = regmap_i2c_smbus_i2c_write_reg16,
 	.read = regmap_i2c_smbus_i2c_read_reg16,
-	.max_raw_read = I2C_SMBUS_BLOCK_MAX,
-	.max_raw_write = I2C_SMBUS_BLOCK_MAX,
+	.max_raw_read = I2C_SMBUS_BLOCK_MAX - 2,
+	.max_raw_write = I2C_SMBUS_BLOCK_MAX - 2,
 };
 
 static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
@@ -350,7 +350,8 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 
 		if (quirks->max_write_len &&
 		    (bus->max_raw_write == 0 || bus->max_raw_write > quirks->max_write_len))
-			max_write = quirks->max_write_len;
+			max_write = quirks->max_write_len -
+				(config->reg_bits + config->pad_bits) / BITS_PER_BYTE;
 
 		if (max_read || max_write) {
 			ret_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
@@ -396,4 +397,5 @@ struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_i2c);
 
+MODULE_DESCRIPTION("Register map access API - I2C support");
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-i3c.c b/drivers/base/regmap/regmap-i3c.c
index 1578fb506683..863b348704dc 100644
--- a/drivers/base/regmap/regmap-i3c.c
+++ b/drivers/base/regmap/regmap-i3c.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
 
+#include <linux/array_size.h>
 #include <linux/regmap.h>
 #include <linux/i3c/device.h>
 #include <linux/i3c/master.h>
@@ -10,7 +11,7 @@ static int regmap_i3c_write(void *context, const void *data, size_t count)
 {
 	struct device *dev = context;
 	struct i3c_device *i3c = dev_to_i3cdev(dev);
-	struct i3c_priv_xfer xfers[] = {
+	struct i3c_xfer xfers[] = {
 		{
 			.rnw = false,
 			.len = count,
@@ -18,7 +19,7 @@ static int regmap_i3c_write(void *context, const void *data, size_t count)
 		},
 	};
 
-	return i3c_device_do_priv_xfers(i3c, xfers, 1);
+	return i3c_device_do_xfers(i3c, xfers, ARRAY_SIZE(xfers), I3C_SDR);
 }
 
 static int regmap_i3c_read(void *context,
@@ -27,7 +28,7 @@ static int regmap_i3c_read(void *context,
 {
 	struct device *dev = context;
 	struct i3c_device *i3c = dev_to_i3cdev(dev);
-	struct i3c_priv_xfer xfers[2];
+	struct i3c_xfer xfers[2];
 
 	xfers[0].rnw = false;
 	xfers[0].len = reg_size;
@@ -37,10 +38,10 @@ static int regmap_i3c_read(void *context,
 	xfers[1].len = val_size;
 	xfers[1].data.in = val;
 
-	return i3c_device_do_priv_xfers(i3c, xfers, 2);
+	return i3c_device_do_xfers(i3c, xfers, ARRAY_SIZE(xfers), I3C_SDR);
 }
 
-static struct regmap_bus regmap_i3c = {
+static const struct regmap_bus regmap_i3c = {
 	.write = regmap_i3c_write,
 	.read = regmap_i3c_read,
 };
@@ -56,5 +57,5 @@ struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c,
 EXPORT_SYMBOL_GPL(__devm_regmap_init_i3c);
 
 MODULE_AUTHOR("Vitor Soares <vitor.soares@synopsys.com>");
-MODULE_DESCRIPTION("Regmap I3C Module");
+MODULE_DESCRIPTION("regmap I3C Module");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index d2656581a608..6112d942499b 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -6,11 +6,13 @@
 //
 // Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
 
+#include <linux/array_size.h>
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/overflow.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
@@ -19,6 +21,7 @@
 
 struct regmap_irq_chip_data {
 	struct mutex lock;
+	struct lock_class_key lock_key;
 	struct irq_chip irq_chip;
 
 	struct regmap *map;
@@ -33,39 +36,21 @@ struct regmap_irq_chip_data {
 	void *status_reg_buf;
 	unsigned int *main_status_buf;
 	unsigned int *status_buf;
+	unsigned int *prev_status_buf;
 	unsigned int *mask_buf;
 	unsigned int *mask_buf_def;
 	unsigned int *wake_buf;
 	unsigned int *type_buf;
 	unsigned int *type_buf_def;
-	unsigned int **virt_buf;
+	unsigned int **config_buf;
 
 	unsigned int irq_reg_stride;
-	unsigned int type_reg_stride;
 
-	bool clear_status:1;
-};
-
-static int sub_irq_reg(struct regmap_irq_chip_data *data,
-		       unsigned int base_reg, int i)
-{
-	const struct regmap_irq_chip *chip = data->chip;
-	struct regmap *map = data->map;
-	struct regmap_irq_sub_irq_map *subreg;
-	unsigned int offset;
-	int reg = 0;
-
-	if (!chip->sub_reg_offsets || !chip->not_fixed_stride) {
-		/* Assume linear mapping */
-		reg = base_reg + (i * map->reg_stride * data->irq_reg_stride);
-	} else {
-		subreg = &chip->sub_reg_offsets[i];
-		offset = subreg->offset[0];
-		reg = base_reg + offset;
-	}
+	unsigned int (*get_irq_reg)(struct regmap_irq_chip_data *data,
+				    unsigned int base, int index);
 
-	return reg;
-}
+	unsigned int clear_status:1;
+};
 
 static inline const
 struct regmap_irq *irq_to_regmap_irq(struct regmap_irq_chip_data *data,
@@ -74,21 +59,25 @@ struct regmap_irq *irq_to_regmap_irq(struct regmap_irq_chip_data *data,
 	return &data->chip->irqs[irq];
 }
 
-static void regmap_irq_lock(struct irq_data *data)
+static bool regmap_irq_can_bulk_read_status(struct regmap_irq_chip_data *data)
 {
-	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+	struct regmap *map = data->map;
 
-	mutex_lock(&d->lock);
+	/*
+	 * While possible that a user-defined ->get_irq_reg() callback might
+	 * be linear enough to support bulk reads, most of the time it won't.
+	 * Therefore only allow them if the default callback is being used.
+	 */
+	return data->irq_reg_stride == 1 && map->reg_stride == 1 &&
+	       data->get_irq_reg == regmap_irq_get_irq_reg_linear &&
+	       !map->use_single_read;
 }
 
-static int regmap_irq_update_bits(struct regmap_irq_chip_data *d,
-				  unsigned int reg, unsigned int mask,
-				  unsigned int val)
+static void regmap_irq_lock(struct irq_data *data)
 {
-	if (d->chip->mask_writeonly)
-		return regmap_write_bits(d->map, reg, mask, val);
-	else
-		return regmap_update_bits(d->map, reg, mask, val);
+	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&d->lock);
 }
 
 static void regmap_irq_sync_unlock(struct irq_data *data)
@@ -97,7 +86,6 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 	struct regmap *map = d->map;
 	int i, j, ret;
 	u32 reg;
-	u32 unmask_offset;
 	u32 val;
 
 	if (d->chip->runtime_pm) {
@@ -109,7 +97,7 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 
 	if (d->clear_status) {
 		for (i = 0; i < d->chip->num_regs; i++) {
-			reg = sub_irq_reg(d, d->chip->status_base, i);
+			reg = d->get_irq_reg(d, d->chip->status_base, i);
 
 			ret = regmap_read(map, reg, &val);
 			if (ret)
@@ -126,44 +114,37 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 	 * suppress pointless writes.
 	 */
 	for (i = 0; i < d->chip->num_regs; i++) {
-		if (!d->chip->mask_base)
-			continue;
+		if (d->chip->handle_mask_sync)
+			d->chip->handle_mask_sync(i, d->mask_buf_def[i],
+						  d->mask_buf[i],
+						  d->chip->irq_drv_data);
+
+		if (d->chip->mask_base && !d->chip->handle_mask_sync) {
+			reg = d->get_irq_reg(d, d->chip->mask_base, i);
+			ret = regmap_update_bits(d->map, reg,
+						 d->mask_buf_def[i],
+						 d->mask_buf[i]);
+			if (ret)
+				dev_err(d->map->dev, "Failed to sync masks in %x\n", reg);
+		}
 
-		reg = sub_irq_reg(d, d->chip->mask_base, i);
-		if (d->chip->mask_invert) {
-			ret = regmap_irq_update_bits(d, reg,
-					 d->mask_buf_def[i], ~d->mask_buf[i]);
-		} else if (d->chip->unmask_base) {
-			/* set mask with mask_base register */
-			ret = regmap_irq_update_bits(d, reg,
+		if (d->chip->unmask_base && !d->chip->handle_mask_sync) {
+			reg = d->get_irq_reg(d, d->chip->unmask_base, i);
+			ret = regmap_update_bits(d->map, reg,
 					d->mask_buf_def[i], ~d->mask_buf[i]);
-			if (ret < 0)
-				dev_err(d->map->dev,
-					"Failed to sync unmasks in %x\n",
+			if (ret)
+				dev_err(d->map->dev, "Failed to sync masks in %x\n",
 					reg);
-			unmask_offset = d->chip->unmask_base -
-							d->chip->mask_base;
-			/* clear mask with unmask_base register */
-			ret = regmap_irq_update_bits(d,
-					reg + unmask_offset,
-					d->mask_buf_def[i],
-					d->mask_buf[i]);
-		} else {
-			ret = regmap_irq_update_bits(d, reg,
-					 d->mask_buf_def[i], d->mask_buf[i]);
 		}
-		if (ret != 0)
-			dev_err(d->map->dev, "Failed to sync masks in %x\n",
-				reg);
 
-		reg = sub_irq_reg(d, d->chip->wake_base, i);
+		reg = d->get_irq_reg(d, d->chip->wake_base, i);
 		if (d->wake_buf) {
 			if (d->chip->wake_invert)
-				ret = regmap_irq_update_bits(d, reg,
+				ret = regmap_update_bits(d->map, reg,
 							 d->mask_buf_def[i],
 							 ~d->wake_buf[i]);
 			else
-				ret = regmap_irq_update_bits(d, reg,
+				ret = regmap_update_bits(d->map, reg,
 							 d->mask_buf_def[i],
 							 d->wake_buf[i]);
 			if (ret != 0)
@@ -180,7 +161,7 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		 * it'll be ignored in irq handler, then may introduce irq storm
 		 */
 		if (d->mask_buf[i] && (d->chip->ack_base || d->chip->use_ack)) {
-			reg = sub_irq_reg(d, d->chip->ack_base, i);
+			reg = d->get_irq_reg(d, d->chip->ack_base, i);
 
 			/* some chips ack by write 0 */
 			if (d->chip->ack_invert)
@@ -189,11 +170,9 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 				ret = regmap_write(map, reg, d->mask_buf[i]);
 			if (d->chip->clear_ack) {
 				if (d->chip->ack_invert && !ret)
-					ret = regmap_write(map, reg,
-							   d->mask_buf[i]);
+					ret = regmap_write(map, reg, UINT_MAX);
 				else if (!ret)
-					ret = regmap_write(map, reg,
-							   ~d->mask_buf[i]);
+					ret = regmap_write(map, reg, 0);
 			}
 			if (ret != 0)
 				dev_err(d->map->dev, "Failed to ack 0x%x: %d\n",
@@ -201,35 +180,14 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		}
 	}
 
-	/* Don't update the type bits if we're using mask bits for irq type. */
-	if (!d->chip->type_in_mask) {
-		for (i = 0; i < d->chip->num_type_reg; i++) {
-			if (!d->type_buf_def[i])
-				continue;
-			reg = sub_irq_reg(d, d->chip->type_base, i);
-			if (d->chip->type_invert)
-				ret = regmap_irq_update_bits(d, reg,
-					d->type_buf_def[i], ~d->type_buf[i]);
-			else
-				ret = regmap_irq_update_bits(d, reg,
-					d->type_buf_def[i], d->type_buf[i]);
-			if (ret != 0)
-				dev_err(d->map->dev, "Failed to sync type in %x\n",
-					reg);
-		}
-	}
-
-	if (d->chip->num_virt_regs) {
-		for (i = 0; i < d->chip->num_virt_regs; i++) {
-			for (j = 0; j < d->chip->num_regs; j++) {
-				reg = sub_irq_reg(d, d->chip->virt_reg_base[i],
-						  j);
-				ret = regmap_write(map, reg, d->virt_buf[i][j]);
-				if (ret != 0)
-					dev_err(d->map->dev,
-						"Failed to write virt 0x%x: %d\n",
-						reg, ret);
-			}
+	for (i = 0; i < d->chip->num_config_bases; i++) {
+		for (j = 0; j < d->chip->num_config_regs; j++) {
+			reg = d->get_irq_reg(d, d->chip->config_base[i], j);
+			ret = regmap_write(map, reg, d->config_buf[i][j]);
+			if (ret)
+				dev_err(d->map->dev,
+					"Failed to write config %x: %d\n",
+					reg, ret);
 		}
 	}
 
@@ -239,10 +197,10 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 	/* If we've changed our wakeup count propagate it to the parent */
 	if (d->wake_count < 0)
 		for (i = d->wake_count; i < 0; i++)
-			irq_set_irq_wake(d->irq, 0);
+			disable_irq_wake(d->irq);
 	else if (d->wake_count > 0)
 		for (i = 0; i < d->wake_count; i++)
-			irq_set_irq_wake(d->irq, 1);
+			enable_irq_wake(d->irq);
 
 	d->wake_count = 0;
 
@@ -254,30 +212,28 @@ static void regmap_irq_enable(struct irq_data *data)
 	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
 	struct regmap *map = d->map;
 	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
-	unsigned int mask, type;
-
-	type = irq_data->type.type_falling_val | irq_data->type.type_rising_val;
+	unsigned int reg = irq_data->reg_offset / map->reg_stride;
+	unsigned int mask;
 
 	/*
 	 * The type_in_mask flag means that the underlying hardware uses
-	 * separate mask bits for rising and falling edge interrupts, but
-	 * we want to make them into a single virtual interrupt with
-	 * configurable edge.
+	 * separate mask bits for each interrupt trigger type, but we want
+	 * to have a single logical interrupt with a configurable type.
 	 *
-	 * If the interrupt we're enabling defines the falling or rising
-	 * masks then instead of using the regular mask bits for this
-	 * interrupt, use the value previously written to the type buffer
-	 * at the corresponding offset in regmap_irq_set_type().
+	 * If the interrupt we're enabling defines any supported types
+	 * then instead of using the regular mask bits for this interrupt,
+	 * use the value previously written to the type buffer at the
+	 * corresponding offset in regmap_irq_set_type().
 	 */
-	if (d->chip->type_in_mask && type)
-		mask = d->type_buf[irq_data->reg_offset / map->reg_stride];
+	if (d->chip->type_in_mask && irq_data->type.types_supported)
+		mask = d->type_buf[reg] & irq_data->mask;
 	else
 		mask = irq_data->mask;
 
 	if (d->chip->clear_on_unmask)
 		d->clear_status = true;
 
-	d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask;
+	d->mask_buf[reg] &= ~mask;
 }
 
 static void regmap_irq_disable(struct irq_data *data)
@@ -294,7 +250,7 @@ static int regmap_irq_set_type(struct irq_data *data, unsigned int type)
 	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
 	struct regmap *map = d->map;
 	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
-	int reg;
+	int reg, ret;
 	const struct regmap_irq_type *t = &irq_data->type;
 
 	if ((t->types_supported & type) != type)
@@ -302,41 +258,19 @@ static int regmap_irq_set_type(struct irq_data *data, unsigned int type)
 
 	reg = t->type_reg_offset / map->reg_stride;
 
-	if (t->type_reg_mask)
-		d->type_buf[reg] &= ~t->type_reg_mask;
-	else
-		d->type_buf[reg] &= ~(t->type_falling_val |
-				      t->type_rising_val |
-				      t->type_level_low_val |
-				      t->type_level_high_val);
-	switch (type) {
-	case IRQ_TYPE_EDGE_FALLING:
-		d->type_buf[reg] |= t->type_falling_val;
-		break;
-
-	case IRQ_TYPE_EDGE_RISING:
-		d->type_buf[reg] |= t->type_rising_val;
-		break;
-
-	case IRQ_TYPE_EDGE_BOTH:
-		d->type_buf[reg] |= (t->type_falling_val |
-					t->type_rising_val);
-		break;
-
-	case IRQ_TYPE_LEVEL_HIGH:
-		d->type_buf[reg] |= t->type_level_high_val;
-		break;
-
-	case IRQ_TYPE_LEVEL_LOW:
-		d->type_buf[reg] |= t->type_level_low_val;
-		break;
-	default:
-		return -EINVAL;
+	if (d->chip->type_in_mask) {
+		ret = regmap_irq_set_type_config_simple(&d->type_buf, type,
+							irq_data, reg, d->chip->irq_drv_data);
+		if (ret)
+			return ret;
 	}
 
-	if (d->chip->set_type_virt)
-		return d->chip->set_type_virt(d->virt_buf, type, data->hwirq,
-					      reg);
+	if (d->chip->set_type_config) {
+		ret = d->chip->set_type_config(d->config_buf, type, irq_data,
+					       reg, d->chip->irq_drv_data);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
@@ -375,29 +309,26 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
 					   unsigned int b)
 {
 	const struct regmap_irq_chip *chip = data->chip;
+	const struct regmap_irq_sub_irq_map *subreg;
 	struct regmap *map = data->map;
-	struct regmap_irq_sub_irq_map *subreg;
+	unsigned int reg;
 	int i, ret = 0;
 
 	if (!chip->sub_reg_offsets) {
-		/* Assume linear mapping */
-		ret = regmap_read(map, chip->status_base +
-				  (b * map->reg_stride * data->irq_reg_stride),
-				   &data->status_buf[b]);
+		reg = data->get_irq_reg(data, chip->status_base, b);
+		ret = regmap_read(map, reg, &data->status_buf[b]);
 	} else {
+		/*
+		 * Note we can't use ->get_irq_reg() here because the offsets
+		 * in 'subreg' are *not* interchangeable with indices.
+		 */
 		subreg = &chip->sub_reg_offsets[b];
 		for (i = 0; i < subreg->num_regs; i++) {
 			unsigned int offset = subreg->offset[i];
+			unsigned int index = offset / map->reg_stride;
 
-			if (chip->not_fixed_stride)
-				ret = regmap_read(map,
-						chip->status_base + offset,
-						&data->status_buf[b]);
-			else
-				ret = regmap_read(map,
-						chip->status_base + offset,
-						&data->status_buf[offset]);
-
+			ret = regmap_read(map, chip->status_base + offset,
+					  &data->status_buf[index]);
 			if (ret)
 				break;
 		}
@@ -405,43 +336,29 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
 	return ret;
 }
 
-static irqreturn_t regmap_irq_thread(int irq, void *d)
+static int read_irq_data(struct regmap_irq_chip_data *data)
 {
-	struct regmap_irq_chip_data *data = d;
 	const struct regmap_irq_chip *chip = data->chip;
 	struct regmap *map = data->map;
 	int ret, i;
-	bool handled = false;
 	u32 reg;
 
-	if (chip->handle_pre_irq)
-		chip->handle_pre_irq(chip->irq_drv_data);
-
-	if (chip->runtime_pm) {
-		ret = pm_runtime_get_sync(map->dev);
-		if (ret < 0) {
-			dev_err(map->dev, "IRQ thread failed to resume: %d\n",
-				ret);
-			goto exit;
-		}
-	}
-
 	/*
 	 * Read only registers with active IRQs if the chip has 'main status
 	 * register'. Else read in the statuses, using a single bulk read if
 	 * possible in order to reduce the I/O overheads.
 	 */
 
-	if (chip->num_main_regs) {
+	if (chip->no_status) {
+		/* no status register so default to all active */
+		memset32(data->status_buf, GENMASK(31, 0), chip->num_regs);
+	} else if (chip->num_main_regs) {
 		unsigned int max_main_bits;
-		unsigned long size;
-
-		size = chip->num_regs * sizeof(unsigned int);
 
 		max_main_bits = (chip->num_main_status_bits) ?
 				 chip->num_main_status_bits : chip->num_regs;
 		/* Clear the status buf as we don't read all status regs */
-		memset(data->status_buf, 0, size);
+		memset32(data->status_buf, 0, chip->num_regs);
 
 		/* We could support bulk read for main status registers
 		 * but I don't expect to see devices with really many main
@@ -449,15 +366,11 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 		 * sake of simplicity. and add bulk reads only if needed
 		 */
 		for (i = 0; i < chip->num_main_regs; i++) {
-			ret = regmap_read(map, chip->main_status +
-				  (i * map->reg_stride
-				   * data->irq_reg_stride),
-				  &data->main_status_buf[i]);
+			reg = data->get_irq_reg(data, chip->main_status, i);
+			ret = regmap_read(map, reg, &data->main_status_buf[i]);
 			if (ret) {
-				dev_err(map->dev,
-					"Failed to read IRQ status %d\n",
-					ret);
-				goto exit;
+				dev_err(map->dev, "Failed to read IRQ status %d\n", ret);
+				return ret;
 			}
 		}
 
@@ -473,16 +386,13 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 				ret = read_sub_irq_data(data, b);
 
 				if (ret != 0) {
-					dev_err(map->dev,
-						"Failed to read IRQ status %d\n",
-						ret);
-					goto exit;
+					dev_err(map->dev, "Failed to read IRQ status %d\n", ret);
+					return ret;
 				}
 			}
 
 		}
-	} else if (!map->use_single_read && map->reg_stride == 1 &&
-		   data->irq_reg_stride == 1) {
+	} else if (regmap_irq_can_bulk_read_status(data)) {
 
 		u8 *buf8 = data->status_reg_buf;
 		u16 *buf16 = data->status_reg_buf;
@@ -494,9 +404,8 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 				       data->status_reg_buf,
 				       chip->num_regs);
 		if (ret != 0) {
-			dev_err(map->dev, "Failed to read IRQ status: %d\n",
-				ret);
-			goto exit;
+			dev_err(map->dev, "Failed to read IRQ status: %d\n", ret);
+			return ret;
 		}
 
 		for (i = 0; i < data->chip->num_regs; i++) {
@@ -512,21 +421,19 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 				break;
 			default:
 				BUG();
-				goto exit;
+				return -EIO;
 			}
 		}
 
 	} else {
 		for (i = 0; i < data->chip->num_regs; i++) {
-			unsigned int reg = sub_irq_reg(data,
+			unsigned int reg = data->get_irq_reg(data,
 					data->chip->status_base, i);
 			ret = regmap_read(map, reg, &data->status_buf[i]);
 
 			if (ret != 0) {
-				dev_err(map->dev,
-					"Failed to read IRQ status: %d\n",
-					ret);
-				goto exit;
+				dev_err(map->dev, "Failed to read IRQ status: %d\n", ret);
+				return ret;
 			}
 		}
 	}
@@ -535,9 +442,45 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 		for (i = 0; i < data->chip->num_regs; i++)
 			data->status_buf[i] = ~data->status_buf[i];
 
+	return 0;
+}
+
+static irqreturn_t regmap_irq_thread(int irq, void *d)
+{
+	struct regmap_irq_chip_data *data = d;
+	const struct regmap_irq_chip *chip = data->chip;
+	struct regmap *map = data->map;
+	int ret, i;
+	bool handled = false;
+	u32 reg;
+
+	if (chip->handle_pre_irq)
+		chip->handle_pre_irq(chip->irq_drv_data);
+
+	if (chip->runtime_pm) {
+		ret = pm_runtime_get_sync(map->dev);
+		if (ret < 0) {
+			dev_err(map->dev, "IRQ thread failed to resume: %d\n", ret);
+			goto exit;
+		}
+	}
+
+	ret = read_irq_data(data);
+	if (ret < 0)
+		goto exit;
+
+	if (chip->status_is_level) {
+		for (i = 0; i < data->chip->num_regs; i++) {
+			unsigned int val = data->status_buf[i];
+
+			data->status_buf[i] ^= data->prev_status_buf[i];
+			data->prev_status_buf[i] = val;
+		}
+	}
+
 	/*
 	 * Ignore masked IRQs and ack if we need to; we ack early so
-	 * there is no race between handling and acknowleding the
+	 * there is no race between handling and acknowledging the
 	 * interrupt.  We assume that typically few of the interrupts
 	 * will fire simultaneously so don't worry about overhead from
 	 * doing a write per register.
@@ -546,7 +489,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 		data->status_buf[i] &= ~data->mask_buf[i];
 
 		if (data->status_buf[i] && (chip->ack_base || chip->use_ack)) {
-			reg = sub_irq_reg(data, data->chip->ack_base, i);
+			reg = data->get_irq_reg(data, data->chip->ack_base, i);
 
 			if (chip->ack_invert)
 				ret = regmap_write(map, reg,
@@ -556,11 +499,9 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 						data->status_buf[i]);
 			if (chip->clear_ack) {
 				if (chip->ack_invert && !ret)
-					ret = regmap_write(map, reg,
-							data->status_buf[i]);
+					ret = regmap_write(map, reg, UINT_MAX);
 				else if (!ret)
-					ret = regmap_write(map, reg,
-							~data->status_buf[i]);
+					ret = regmap_write(map, reg, 0);
 			}
 			if (ret != 0)
 				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
@@ -577,24 +518,28 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 	}
 
 exit:
-	if (chip->runtime_pm)
-		pm_runtime_put(map->dev);
-
 	if (chip->handle_post_irq)
 		chip->handle_post_irq(chip->irq_drv_data);
 
+	if (chip->runtime_pm)
+		pm_runtime_put(map->dev);
+
 	if (handled)
 		return IRQ_HANDLED;
 	else
 		return IRQ_NONE;
 }
 
+static struct lock_class_key regmap_irq_lock_class;
+static struct lock_class_key regmap_irq_request_class;
+
 static int regmap_irq_map(struct irq_domain *h, unsigned int virq,
 			  irq_hw_number_t hw)
 {
 	struct regmap_irq_chip_data *data = h->host_data;
 
 	irq_set_chip_data(virq, data);
+	irq_set_lockdep_class(virq, &regmap_irq_lock_class, &regmap_irq_request_class);
 	irq_set_chip(virq, &data->irq_chip);
 	irq_set_nested_thread(virq, 1);
 	irq_set_parent(virq, data->irq);
@@ -609,6 +554,105 @@ static const struct irq_domain_ops regmap_domain_ops = {
 };
 
 /**
+ * regmap_irq_get_irq_reg_linear() - Linear IRQ register mapping callback.
+ * @data: Data for the &struct regmap_irq_chip
+ * @base: Base register
+ * @index: Register index
+ *
+ * Returns the register address corresponding to the given @base and @index
+ * by the formula ``base + index * regmap_stride * irq_reg_stride``.
+ */
+unsigned int regmap_irq_get_irq_reg_linear(struct regmap_irq_chip_data *data,
+					   unsigned int base, int index)
+{
+	struct regmap *map = data->map;
+
+	return base + index * map->reg_stride * data->irq_reg_stride;
+}
+EXPORT_SYMBOL_GPL(regmap_irq_get_irq_reg_linear);
+
+/**
+ * regmap_irq_set_type_config_simple() - Simple IRQ type configuration callback.
+ * @buf: Buffer containing configuration register values, this is a 2D array of
+ *       `num_config_bases` rows, each of `num_config_regs` elements.
+ * @type: The requested IRQ type.
+ * @irq_data: The IRQ being configured.
+ * @idx: Index of the irq's config registers within each array `buf[i]`
+ * @irq_drv_data: Driver specific IRQ data
+ *
+ * This is a &struct regmap_irq_chip->set_type_config callback suitable for
+ * chips with one config register. Register values are updated according to
+ * the &struct regmap_irq_type data associated with an IRQ.
+ */
+int regmap_irq_set_type_config_simple(unsigned int **buf, unsigned int type,
+				      const struct regmap_irq *irq_data,
+				      int idx, void *irq_drv_data)
+{
+	const struct regmap_irq_type *t = &irq_data->type;
+
+	if (t->type_reg_mask)
+		buf[0][idx] &= ~t->type_reg_mask;
+	else
+		buf[0][idx] &= ~(t->type_falling_val |
+				 t->type_rising_val |
+				 t->type_level_low_val |
+				 t->type_level_high_val);
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_FALLING:
+		buf[0][idx] |= t->type_falling_val;
+		break;
+
+	case IRQ_TYPE_EDGE_RISING:
+		buf[0][idx] |= t->type_rising_val;
+		break;
+
+	case IRQ_TYPE_EDGE_BOTH:
+		buf[0][idx] |= (t->type_falling_val |
+				t->type_rising_val);
+		break;
+
+	case IRQ_TYPE_LEVEL_HIGH:
+		buf[0][idx] |= t->type_level_high_val;
+		break;
+
+	case IRQ_TYPE_LEVEL_LOW:
+		buf[0][idx] |= t->type_level_low_val;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(regmap_irq_set_type_config_simple);
+
+static int regmap_irq_create_domain(struct fwnode_handle *fwnode, int irq_base,
+				    const struct regmap_irq_chip *chip,
+				    struct regmap_irq_chip_data *d)
+{
+	struct irq_domain_info info = {
+		.fwnode = fwnode,
+		.size = chip->num_irqs,
+		.hwirq_max = chip->num_irqs,
+		.virq_base = irq_base,
+		.ops = &regmap_domain_ops,
+		.host_data = d,
+		.name_suffix = chip->domain_suffix,
+	};
+
+	d->domain = irq_domain_instantiate(&info);
+	if (IS_ERR(d->domain)) {
+		dev_err(d->map->dev, "Failed to create IRQ domain\n");
+		return PTR_ERR(d->domain);
+	}
+
+	return 0;
+}
+
+
+/**
  * regmap_add_irq_chip_fwnode() - Use standard regmap IRQ controller handling
  *
  * @fwnode: The firmware node where the IRQ domain should be added to.
@@ -634,9 +678,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	struct regmap_irq_chip_data *d;
 	int i;
 	int ret = -ENOMEM;
-	int num_type_reg;
 	u32 reg;
-	u32 unmask_offset;
 
 	if (chip->num_regs <= 0)
 		return -EINVAL;
@@ -644,6 +686,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	if (chip->clear_on_unmask && (chip->ack_base || chip->use_ack))
 		return -EINVAL;
 
+	if (chip->mask_base && chip->unmask_base && !chip->mask_unmask_non_inverted)
+		return -EINVAL;
+
 	for (i = 0; i < chip->num_irqs; i++) {
 		if (chip->irqs[i].reg_offset % map->reg_stride)
 			return -EINVAL;
@@ -652,12 +697,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			return -EINVAL;
 	}
 
-	if (chip->not_fixed_stride) {
-		for (i = 0; i < chip->num_regs; i++)
-			if (chip->sub_reg_offsets[i].num_regs != 1)
-				return -EINVAL;
-	}
-
 	if (irq_base) {
 		irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
 		if (irq_base < 0) {
@@ -673,62 +712,67 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 
 	if (chip->num_main_regs) {
 		d->main_status_buf = kcalloc(chip->num_main_regs,
-					     sizeof(unsigned int),
+					     sizeof(*d->main_status_buf),
 					     GFP_KERNEL);
 
 		if (!d->main_status_buf)
 			goto err_alloc;
 	}
 
-	d->status_buf = kcalloc(chip->num_regs, sizeof(unsigned int),
+	d->status_buf = kcalloc(chip->num_regs, sizeof(*d->status_buf),
 				GFP_KERNEL);
 	if (!d->status_buf)
 		goto err_alloc;
 
-	d->mask_buf = kcalloc(chip->num_regs, sizeof(unsigned int),
+	if (chip->status_is_level) {
+		d->prev_status_buf = kcalloc(chip->num_regs, sizeof(*d->prev_status_buf),
+					     GFP_KERNEL);
+		if (!d->prev_status_buf)
+			goto err_alloc;
+	}
+
+	d->mask_buf = kcalloc(chip->num_regs, sizeof(*d->mask_buf),
 			      GFP_KERNEL);
 	if (!d->mask_buf)
 		goto err_alloc;
 
-	d->mask_buf_def = kcalloc(chip->num_regs, sizeof(unsigned int),
+	d->mask_buf_def = kcalloc(chip->num_regs, sizeof(*d->mask_buf_def),
 				  GFP_KERNEL);
 	if (!d->mask_buf_def)
 		goto err_alloc;
 
 	if (chip->wake_base) {
-		d->wake_buf = kcalloc(chip->num_regs, sizeof(unsigned int),
+		d->wake_buf = kcalloc(chip->num_regs, sizeof(*d->wake_buf),
 				      GFP_KERNEL);
 		if (!d->wake_buf)
 			goto err_alloc;
 	}
 
-	num_type_reg = chip->type_in_mask ? chip->num_regs : chip->num_type_reg;
-	if (num_type_reg) {
-		d->type_buf_def = kcalloc(num_type_reg,
-					  sizeof(unsigned int), GFP_KERNEL);
+	if (chip->type_in_mask) {
+		d->type_buf_def = kcalloc(chip->num_regs,
+					  sizeof(*d->type_buf_def), GFP_KERNEL);
 		if (!d->type_buf_def)
 			goto err_alloc;
 
-		d->type_buf = kcalloc(num_type_reg, sizeof(unsigned int),
-				      GFP_KERNEL);
+		d->type_buf = kcalloc(chip->num_regs, sizeof(*d->type_buf), GFP_KERNEL);
 		if (!d->type_buf)
 			goto err_alloc;
 	}
 
-	if (chip->num_virt_regs) {
+	if (chip->num_config_bases && chip->num_config_regs) {
 		/*
-		 * Create virt_buf[chip->num_extra_config_regs][chip->num_regs]
+		 * Create config_buf[num_config_bases][num_config_regs]
 		 */
-		d->virt_buf = kcalloc(chip->num_virt_regs, sizeof(*d->virt_buf),
-				      GFP_KERNEL);
-		if (!d->virt_buf)
+		d->config_buf = kcalloc(chip->num_config_bases,
+					sizeof(*d->config_buf), GFP_KERNEL);
+		if (!d->config_buf)
 			goto err_alloc;
 
-		for (i = 0; i < chip->num_virt_regs; i++) {
-			d->virt_buf[i] = kcalloc(chip->num_regs,
-						 sizeof(unsigned int),
-						 GFP_KERNEL);
-			if (!d->virt_buf[i])
+		for (i = 0; i < chip->num_config_bases; i++) {
+			d->config_buf[i] = kcalloc(chip->num_config_regs,
+						   sizeof(**d->config_buf),
+						   GFP_KERNEL);
+			if (!d->config_buf[i])
 				goto err_alloc;
 		}
 	}
@@ -745,13 +789,12 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	else
 		d->irq_reg_stride = 1;
 
-	if (chip->type_reg_stride)
-		d->type_reg_stride = chip->type_reg_stride;
+	if (chip->get_irq_reg)
+		d->get_irq_reg = chip->get_irq_reg;
 	else
-		d->type_reg_stride = 1;
+		d->get_irq_reg = regmap_irq_get_irq_reg_linear;
 
-	if (!map->use_single_read && map->reg_stride == 1 &&
-	    d->irq_reg_stride == 1) {
+	if (regmap_irq_can_bulk_read_status(d)) {
 		d->status_reg_buf = kmalloc_array(chip->num_regs,
 						  map->format.val_bytes,
 						  GFP_KERNEL);
@@ -759,7 +802,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			goto err_alloc;
 	}
 
-	mutex_init(&d->lock);
+	/*
+	 * If one regmap-irq is the parent of another then we'll try
+	 * to lock the child with the parent locked, use an explicit
+	 * lock_key so lockdep can figure out what's going on.
+	 */
+	lockdep_register_key(&d->lock_key);
+	mutex_init_with_key(&d->lock, &d->lock_key);
 
 	for (i = 0; i < chip->num_irqs; i++)
 		d->mask_buf_def[chip->irqs[i].reg_offset / map->reg_stride]
@@ -768,47 +817,60 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	/* Mask all the interrupts by default */
 	for (i = 0; i < chip->num_regs; i++) {
 		d->mask_buf[i] = d->mask_buf_def[i];
-		if (!chip->mask_base)
-			continue;
 
-		reg = sub_irq_reg(d, d->chip->mask_base, i);
-
-		if (chip->mask_invert)
-			ret = regmap_irq_update_bits(d, reg,
-					 d->mask_buf[i], ~d->mask_buf[i]);
-		else if (d->chip->unmask_base) {
-			unmask_offset = d->chip->unmask_base -
-					d->chip->mask_base;
-			ret = regmap_irq_update_bits(d,
-					reg + unmask_offset,
-					d->mask_buf[i],
-					d->mask_buf[i]);
-		} else
-			ret = regmap_irq_update_bits(d, reg,
-					 d->mask_buf[i], d->mask_buf[i]);
-		if (ret != 0) {
-			dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
-				reg, ret);
-			goto err_alloc;
+		if (chip->handle_mask_sync) {
+			ret = chip->handle_mask_sync(i, d->mask_buf_def[i],
+						     d->mask_buf[i],
+						     chip->irq_drv_data);
+			if (ret)
+				goto err_mutex;
+		}
+
+		if (chip->mask_base && !chip->handle_mask_sync) {
+			reg = d->get_irq_reg(d, chip->mask_base, i);
+			ret = regmap_update_bits(d->map, reg,
+						 d->mask_buf_def[i],
+						 d->mask_buf[i]);
+			if (ret) {
+				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
+					reg, ret);
+				goto err_mutex;
+			}
+		}
+
+		if (chip->unmask_base && !chip->handle_mask_sync) {
+			reg = d->get_irq_reg(d, chip->unmask_base, i);
+			ret = regmap_update_bits(d->map, reg,
+					d->mask_buf_def[i], ~d->mask_buf[i]);
+			if (ret) {
+				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
+					reg, ret);
+				goto err_mutex;
+			}
 		}
 
 		if (!chip->init_ack_masked)
 			continue;
 
 		/* Ack masked but set interrupts */
-		reg = sub_irq_reg(d, d->chip->status_base, i);
-		ret = regmap_read(map, reg, &d->status_buf[i]);
-		if (ret != 0) {
-			dev_err(map->dev, "Failed to read IRQ status: %d\n",
-				ret);
-			goto err_alloc;
+		if (d->chip->no_status) {
+			/* no status register so default to all active */
+			d->status_buf[i] = UINT_MAX;
+		} else {
+			reg = d->get_irq_reg(d, d->chip->status_base, i);
+			ret = regmap_read(map, reg, &d->status_buf[i]);
+			if (ret != 0) {
+				dev_err(map->dev, "Failed to read IRQ status: %d\n",
+					ret);
+				goto err_mutex;
+			}
 		}
 
 		if (chip->status_invert)
 			d->status_buf[i] = ~d->status_buf[i];
 
 		if (d->status_buf[i] && (chip->ack_base || chip->use_ack)) {
-			reg = sub_irq_reg(d, d->chip->ack_base, i);
+			reg = d->get_irq_reg(d, d->chip->ack_base, i);
 			if (chip->ack_invert)
 				ret = regmap_write(map, reg,
 					~(d->status_buf[i] & d->mask_buf[i]));
@@ -817,18 +879,14 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 					d->status_buf[i] & d->mask_buf[i]);
 			if (chip->clear_ack) {
 				if (chip->ack_invert && !ret)
-					ret = regmap_write(map, reg,
-						(d->status_buf[i] &
-						 d->mask_buf[i]));
+					ret = regmap_write(map, reg, UINT_MAX);
 				else if (!ret)
-					ret = regmap_write(map, reg,
-						~(d->status_buf[i] &
-						  d->mask_buf[i]));
+					ret = regmap_write(map, reg, 0);
 			}
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
 					reg, ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 	}
@@ -837,53 +895,37 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	if (d->wake_buf) {
 		for (i = 0; i < chip->num_regs; i++) {
 			d->wake_buf[i] = d->mask_buf_def[i];
-			reg = sub_irq_reg(d, d->chip->wake_base, i);
+			reg = d->get_irq_reg(d, d->chip->wake_base, i);
 
 			if (chip->wake_invert)
-				ret = regmap_irq_update_bits(d, reg,
+				ret = regmap_update_bits(d->map, reg,
 							 d->mask_buf_def[i],
 							 0);
 			else
-				ret = regmap_irq_update_bits(d, reg,
+				ret = regmap_update_bits(d->map, reg,
 							 d->mask_buf_def[i],
 							 d->wake_buf[i]);
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
 					reg, ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 	}
 
-	if (chip->num_type_reg && !chip->type_in_mask) {
-		for (i = 0; i < chip->num_type_reg; ++i) {
-			reg = sub_irq_reg(d, d->chip->type_base, i);
-
-			ret = regmap_read(map, reg, &d->type_buf_def[i]);
-
-			if (d->chip->type_invert)
-				d->type_buf_def[i] = ~d->type_buf_def[i];
+	/* Store current levels */
+	if (chip->status_is_level) {
+		ret = read_irq_data(d);
+		if (ret < 0)
+			goto err_mutex;
 
-			if (ret) {
-				dev_err(map->dev, "Failed to get type defaults at 0x%x: %d\n",
-					reg, ret);
-				goto err_alloc;
-			}
-		}
+		memcpy(d->prev_status_buf, d->status_buf,
+		       array_size(d->chip->num_regs, sizeof(d->prev_status_buf[0])));
 	}
 
-	if (irq_base)
-		d->domain = irq_domain_create_legacy(fwnode, chip->num_irqs,
-						     irq_base, 0,
-						     &regmap_domain_ops, d);
-	else
-		d->domain = irq_domain_create_linear(fwnode, chip->num_irqs,
-						     &regmap_domain_ops, d);
-	if (!d->domain) {
-		dev_err(map->dev, "Failed to create IRQ domain\n");
-		ret = -ENOMEM;
-		goto err_alloc;
-	}
+	ret = regmap_irq_create_domain(fwnode, irq_base, chip, d);
+	if (ret)
+		goto err_mutex;
 
 	ret = request_threaded_irq(irq, NULL, regmap_irq_thread,
 				   irq_flags | IRQF_ONESHOT,
@@ -900,18 +942,23 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 
 err_domain:
 	/* Should really dispose of the domain but... */
+err_mutex:
+	mutex_destroy(&d->lock);
+	lockdep_unregister_key(&d->lock_key);
 err_alloc:
 	kfree(d->type_buf);
 	kfree(d->type_buf_def);
 	kfree(d->wake_buf);
 	kfree(d->mask_buf_def);
 	kfree(d->mask_buf);
+	kfree(d->main_status_buf);
 	kfree(d->status_buf);
+	kfree(d->prev_status_buf);
 	kfree(d->status_reg_buf);
-	if (d->virt_buf) {
-		for (i = 0; i < chip->num_virt_regs; i++)
-			kfree(d->virt_buf[i]);
-		kfree(d->virt_buf);
+	if (d->config_buf) {
+		for (i = 0; i < chip->num_config_bases; i++)
+			kfree(d->config_buf[i]);
+		kfree(d->config_buf);
 	}
 	kfree(d);
 	return ret;
@@ -953,7 +1000,7 @@ EXPORT_SYMBOL_GPL(regmap_add_irq_chip);
 void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 {
 	unsigned int virq;
-	int hwirq;
+	int i, hwirq;
 
 	if (!d)
 		return;
@@ -981,8 +1028,17 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 	kfree(d->wake_buf);
 	kfree(d->mask_buf_def);
 	kfree(d->mask_buf);
+	kfree(d->main_status_buf);
 	kfree(d->status_reg_buf);
 	kfree(d->status_buf);
+	kfree(d->prev_status_buf);
+	if (d->config_buf) {
+		for (i = 0; i < d->chip->num_config_bases; i++)
+			kfree(d->config_buf[i]);
+		kfree(d->config_buf);
+	}
+	mutex_destroy(&d->lock);
+	lockdep_unregister_key(&d->lock_key);
 	kfree(d);
 }
 EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
@@ -1053,7 +1109,7 @@ int devm_regmap_add_irq_chip_fwnode(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip_fwnode);
 
 /**
- * devm_regmap_add_irq_chip() - Resource manager regmap_add_irq_chip()
+ * devm_regmap_add_irq_chip() - Resource managed regmap_add_irq_chip()
  *
  * @dev: The device pointer on which irq_chip belongs to.
  * @map: The regmap for the device.
@@ -1082,7 +1138,7 @@ EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip);
 /**
  * devm_regmap_del_irq_chip() - Resource managed regmap_del_irq_chip()
  *
- * @dev: Device for which which resource was allocated.
+ * @dev: Device for which the resource was allocated.
  * @irq: Primary IRQ for the device.
  * @data: &regmap_irq_chip_data allocated by regmap_add_irq_chip().
  *
diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c
new file mode 100644
index 000000000000..f6fc5ed016da
--- /dev/null
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -0,0 +1,2131 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// regmap KUnit tests
+//
+// Copyright 2023 Arm Ltd
+
+#include <kunit/device.h>
+#include <kunit/resource.h>
+#include <kunit/test.h>
+#include "internal.h"
+
+#define BLOCK_TEST_SIZE 12
+
+KUNIT_DEFINE_ACTION_WRAPPER(regmap_exit_action, regmap_exit, struct regmap *);
+
+struct regmap_test_priv {
+	struct device *dev;
+};
+
+struct regmap_test_param {
+	enum regcache_type cache;
+	enum regmap_endian val_endian;
+
+	unsigned int from_reg;
+	bool fast_io;
+};
+
+static void get_changed_bytes(void *orig, void *new, size_t size)
+{
+	char *o = orig;
+	char *n = new;
+	int i;
+
+	get_random_bytes(new, size);
+
+	/*
+	 * This could be nicer and more efficient but we shouldn't
+	 * super care.
+	 */
+	for (i = 0; i < size; i++)
+		while (n[i] == o[i])
+			get_random_bytes(&n[i], 1);
+}
+
+static const struct regmap_config test_regmap_config = {
+	.reg_stride = 1,
+	.val_bits = sizeof(unsigned int) * 8,
+};
+
+static const char *regcache_type_name(enum regcache_type type)
+{
+	switch (type) {
+	case REGCACHE_NONE:
+		return "none";
+	case REGCACHE_FLAT:
+		return "flat";
+	case REGCACHE_FLAT_S:
+		return "flat-sparse";
+	case REGCACHE_RBTREE:
+		return "rbtree";
+	case REGCACHE_MAPLE:
+		return "maple";
+	default:
+		return NULL;
+	}
+}
+
+static const char *regmap_endian_name(enum regmap_endian endian)
+{
+	switch (endian) {
+	case REGMAP_ENDIAN_BIG:
+		return "big";
+	case REGMAP_ENDIAN_LITTLE:
+		return "little";
+	case REGMAP_ENDIAN_DEFAULT:
+		return "default";
+	case REGMAP_ENDIAN_NATIVE:
+		return "native";
+	default:
+		return NULL;
+	}
+}
+
+static void param_to_desc(const struct regmap_test_param *param, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s-%s%s @%#x",
+		 regcache_type_name(param->cache),
+		 regmap_endian_name(param->val_endian),
+		 param->fast_io ? " fast I/O" : "",
+		 param->from_reg);
+}
+
+static const struct regmap_test_param regcache_types_list[] = {
+	{ .cache = REGCACHE_NONE },
+	{ .cache = REGCACHE_NONE, .fast_io = true },
+	{ .cache = REGCACHE_FLAT },
+	{ .cache = REGCACHE_FLAT, .fast_io = true },
+	{ .cache = REGCACHE_FLAT_S },
+	{ .cache = REGCACHE_FLAT_S, .fast_io = true },
+	{ .cache = REGCACHE_RBTREE },
+	{ .cache = REGCACHE_RBTREE, .fast_io = true },
+	{ .cache = REGCACHE_MAPLE },
+	{ .cache = REGCACHE_MAPLE, .fast_io = true },
+};
+
+KUNIT_ARRAY_PARAM(regcache_types, regcache_types_list, param_to_desc);
+
+static const struct regmap_test_param real_cache_types_only_list[] = {
+	{ .cache = REGCACHE_FLAT },
+	{ .cache = REGCACHE_FLAT, .fast_io = true },
+	{ .cache = REGCACHE_FLAT_S },
+	{ .cache = REGCACHE_FLAT_S, .fast_io = true },
+	{ .cache = REGCACHE_RBTREE },
+	{ .cache = REGCACHE_RBTREE, .fast_io = true },
+	{ .cache = REGCACHE_MAPLE },
+	{ .cache = REGCACHE_MAPLE, .fast_io = true },
+};
+
+KUNIT_ARRAY_PARAM(real_cache_types_only, real_cache_types_only_list, param_to_desc);
+
+static const struct regmap_test_param real_cache_types_list[] = {
+	{ .cache = REGCACHE_FLAT,   .from_reg = 0 },
+	{ .cache = REGCACHE_FLAT,   .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_FLAT,   .from_reg = 0x2001 },
+	{ .cache = REGCACHE_FLAT,   .from_reg = 0x2002 },
+	{ .cache = REGCACHE_FLAT,   .from_reg = 0x2003 },
+	{ .cache = REGCACHE_FLAT,   .from_reg = 0x2004 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2001 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2002 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2003 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2004 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2001 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2002 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2003 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2004 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2001 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2002 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2003 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2004 },
+};
+
+KUNIT_ARRAY_PARAM(real_cache_types, real_cache_types_list, param_to_desc);
+
+static const struct regmap_test_param sparse_cache_types_list[] = {
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2001 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2002 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2003 },
+	{ .cache = REGCACHE_FLAT_S, .from_reg = 0x2004 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2001 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2002 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2003 },
+	{ .cache = REGCACHE_RBTREE, .from_reg = 0x2004 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0, .fast_io = true },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2001 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2002 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2003 },
+	{ .cache = REGCACHE_MAPLE,  .from_reg = 0x2004 },
+};
+
+KUNIT_ARRAY_PARAM(sparse_cache_types, sparse_cache_types_list, param_to_desc);
+
+static struct regmap *gen_regmap(struct kunit *test,
+				 struct regmap_config *config,
+				 struct regmap_ram_data **data)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap_test_priv *priv = test->priv;
+	unsigned int *buf;
+	struct regmap *ret = ERR_PTR(-ENOMEM);
+	size_t size;
+	int i, error;
+	struct reg_default *defaults;
+
+	config->cache_type = param->cache;
+	config->fast_io = param->fast_io;
+
+	if (config->max_register == 0) {
+		config->max_register = param->from_reg;
+		if (config->num_reg_defaults)
+			config->max_register += (config->num_reg_defaults - 1) *
+						config->reg_stride;
+		else
+			config->max_register += (BLOCK_TEST_SIZE * config->reg_stride);
+	}
+
+	size = array_size(config->max_register + 1, sizeof(*buf));
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	get_random_bytes(buf, size);
+
+	*data = kzalloc(sizeof(**data), GFP_KERNEL);
+	if (!(*data))
+		goto out_free;
+	(*data)->vals = buf;
+
+	if (config->num_reg_defaults) {
+		defaults = kunit_kcalloc(test,
+					 config->num_reg_defaults,
+					 sizeof(struct reg_default),
+					 GFP_KERNEL);
+		if (!defaults)
+			goto out_free;
+
+		config->reg_defaults = defaults;
+
+		for (i = 0; i < config->num_reg_defaults; i++) {
+			defaults[i].reg = param->from_reg + (i * config->reg_stride);
+			defaults[i].def = buf[param->from_reg + (i * config->reg_stride)];
+		}
+	}
+
+	ret = regmap_init_ram(priv->dev, config, *data);
+	if (IS_ERR(ret))
+		goto out_free;
+
+	/* This calls regmap_exit() on failure, which frees buf and *data */
+	error = kunit_add_action_or_reset(test, regmap_exit_action, ret);
+	if (error)
+		ret = ERR_PTR(error);
+
+	return ret;
+
+out_free:
+	kfree(buf);
+	kfree(*data);
+
+	return ret;
+}
+
+static bool reg_5_false(struct device *dev, unsigned int reg)
+{
+	struct kunit *test = dev_get_drvdata(dev);
+	const struct regmap_test_param *param = test->param_value;
+
+	return reg != (param->from_reg + 5);
+}
+
+static void basic_read_write(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val, rval;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* If we write a value to a register we can read it back */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 0, val));
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, 0, &rval));
+	KUNIT_EXPECT_EQ(test, val, rval);
+
+	/* If using a cache the cache satisfied the read */
+	KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[0]);
+}
+
+static void bulk_write(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[BLOCK_TEST_SIZE], rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/*
+	 * Data written via the bulk API can be read back with single
+	 * reads.
+	 */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, 0, val,
+						   BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval[i]));
+
+	KUNIT_EXPECT_MEMEQ(test, val, rval, sizeof(val));
+
+	/* If using a cache the cache satisfied the read */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+}
+
+static void bulk_read(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[BLOCK_TEST_SIZE], rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Data written as single writes can be read via the bulk API */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, val[i]));
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, 0, rval,
+						  BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, val, rval, sizeof(val));
+
+	/* If using a cache the cache satisfied the read */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+}
+
+static void multi_write(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	struct reg_sequence sequence[BLOCK_TEST_SIZE];
+	unsigned int val[BLOCK_TEST_SIZE], rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/*
+	 * Data written via the multi API can be read back with single
+	 * reads.
+	 */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		sequence[i].reg = i;
+		sequence[i].def = val[i];
+		sequence[i].delay_us = 0;
+	}
+	KUNIT_EXPECT_EQ(test, 0,
+			regmap_multi_reg_write(map, sequence, BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval[i]));
+
+	KUNIT_EXPECT_MEMEQ(test, val, rval, sizeof(val));
+
+	/* If using a cache the cache satisfied the read */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+}
+
+static void multi_read(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int regs[BLOCK_TEST_SIZE];
+	unsigned int val[BLOCK_TEST_SIZE], rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Data written as single writes can be read via the multi API */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		regs[i] = i;
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, val[i]));
+	}
+	KUNIT_EXPECT_EQ(test, 0,
+			regmap_multi_reg_read(map, regs, rval, BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, val, rval, sizeof(val));
+
+	/* If using a cache the cache satisfied the read */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+}
+
+static void read_bypassed(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[BLOCK_TEST_SIZE], rval;
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	KUNIT_EXPECT_FALSE(test, map->cache_bypass);
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Write some test values */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, param->from_reg, val, ARRAY_SIZE(val)));
+
+	regcache_cache_only(map, true);
+
+	/*
+	 * While in cache-only regmap_read_bypassed() should return the register
+	 * value and leave the map in cache-only.
+	 */
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		/* Put inverted bits in rval to prove we really read the value */
+		rval = ~val[i];
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg + i, &rval));
+		KUNIT_EXPECT_EQ(test, val[i], rval);
+
+		rval = ~val[i];
+		KUNIT_EXPECT_EQ(test, 0, regmap_read_bypassed(map, param->from_reg + i, &rval));
+		KUNIT_EXPECT_EQ(test, val[i], rval);
+		KUNIT_EXPECT_TRUE(test, map->cache_only);
+		KUNIT_EXPECT_FALSE(test, map->cache_bypass);
+	}
+
+	/*
+	 * Change the underlying register values to prove it is returning
+	 * real values not cached values.
+	 */
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		val[i] = ~val[i];
+		data->vals[param->from_reg + i] = val[i];
+	}
+
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		rval = ~val[i];
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg + i, &rval));
+		KUNIT_EXPECT_NE(test, val[i], rval);
+
+		rval = ~val[i];
+		KUNIT_EXPECT_EQ(test, 0, regmap_read_bypassed(map, param->from_reg + i, &rval));
+		KUNIT_EXPECT_EQ(test, val[i], rval);
+		KUNIT_EXPECT_TRUE(test, map->cache_only);
+		KUNIT_EXPECT_FALSE(test, map->cache_bypass);
+	}
+}
+
+static void read_bypassed_volatile(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[BLOCK_TEST_SIZE], rval;
+	int i;
+
+	config = test_regmap_config;
+	/* All registers except #5 volatile */
+	config.volatile_reg = reg_5_false;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	KUNIT_EXPECT_FALSE(test, map->cache_bypass);
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Write some test values */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, param->from_reg, val, ARRAY_SIZE(val)));
+
+	regcache_cache_only(map, true);
+
+	/*
+	 * While in cache-only regmap_read_bypassed() should return the register
+	 * value and leave the map in cache-only.
+	 */
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		/* Register #5 is non-volatile so should read from cache */
+		KUNIT_EXPECT_EQ(test, (i == 5) ? 0 : -EBUSY,
+				regmap_read(map, param->from_reg + i, &rval));
+
+		/* Put inverted bits in rval to prove we really read the value */
+		rval = ~val[i];
+		KUNIT_EXPECT_EQ(test, 0, regmap_read_bypassed(map, param->from_reg + i, &rval));
+		KUNIT_EXPECT_EQ(test, val[i], rval);
+		KUNIT_EXPECT_TRUE(test, map->cache_only);
+		KUNIT_EXPECT_FALSE(test, map->cache_bypass);
+	}
+
+	/*
+	 * Change the underlying register values to prove it is returning
+	 * real values not cached values.
+	 */
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		val[i] = ~val[i];
+		data->vals[param->from_reg + i] = val[i];
+	}
+
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		if (i == 5)
+			continue;
+
+		rval = ~val[i];
+		KUNIT_EXPECT_EQ(test, 0, regmap_read_bypassed(map, param->from_reg + i, &rval));
+		KUNIT_EXPECT_EQ(test, val[i], rval);
+		KUNIT_EXPECT_TRUE(test, map->cache_only);
+		KUNIT_EXPECT_FALSE(test, map->cache_bypass);
+	}
+}
+
+static void write_readonly(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+	config.writeable_reg = reg_5_false;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[i] = false;
+
+	/* Change the value of all registers, readonly should fail */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, i != 5, regmap_write(map, i, val) == 0);
+
+	/* Did that match what we see on the device? */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, i != 5, data->written[i]);
+}
+
+static void read_writeonly(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.readable_reg = reg_5_false;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[i] = false;
+
+	/*
+	 * Try to read all the registers, the writeonly one should
+	 * fail if we aren't using the flat cache.
+	 */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		if (config.cache_type != REGCACHE_FLAT) {
+			KUNIT_EXPECT_EQ(test, i != 5,
+					regmap_read(map, i, &val) == 0);
+		} else {
+			KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &val));
+		}
+	}
+
+	/* Did we trigger a hardware access? */
+	KUNIT_EXPECT_FALSE(test, data->read[5]);
+}
+
+static void reg_defaults(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Read back the expected default data */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, 0, rval,
+						  BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, data->vals, rval, sizeof(rval));
+
+	/* The data should have been read from cache if there was one */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+}
+
+static void reg_defaults_read_dev(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults_raw = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* We should have read the cache defaults back from the map */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		KUNIT_EXPECT_EQ(test, config.cache_type != REGCACHE_NONE, data->read[i]);
+		data->read[i] = false;
+	}
+
+	/* Read back the expected default data */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, 0, rval,
+						  BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, data->vals, rval, sizeof(rval));
+
+	/* The data should have been read from cache if there was one */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+}
+
+static void register_patch(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	struct reg_sequence patch[2];
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	/* We need defaults so readback works */
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Stash the original values */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, 0, rval,
+						  BLOCK_TEST_SIZE));
+
+	/* Patch a couple of values */
+	patch[0].reg = 2;
+	patch[0].def = rval[2] + 1;
+	patch[0].delay_us = 0;
+	patch[1].reg = 5;
+	patch[1].def = rval[5] + 1;
+	patch[1].delay_us = 0;
+	KUNIT_EXPECT_EQ(test, 0, regmap_register_patch(map, patch,
+						       ARRAY_SIZE(patch)));
+
+	/* Only the patched registers are written */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		switch (i) {
+		case 2:
+		case 5:
+			KUNIT_EXPECT_TRUE(test, data->written[i]);
+			KUNIT_EXPECT_EQ(test, data->vals[i], rval[i] + 1);
+			break;
+		default:
+			KUNIT_EXPECT_FALSE(test, data->written[i]);
+			KUNIT_EXPECT_EQ(test, data->vals[i], rval[i]);
+			break;
+		}
+	}
+}
+
+static void stride(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval;
+	int i;
+
+	config = test_regmap_config;
+	config.reg_stride = 2;
+	config.num_reg_defaults = BLOCK_TEST_SIZE / 2;
+
+	/*
+	 * Allow one extra register so that the read/written arrays
+	 * are sized big enough to include an entry for the odd
+	 * address past the final reg_default register.
+	 */
+	config.max_register = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Only even addresses can be accessed, try both read and write */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		data->read[i] = false;
+		data->written[i] = false;
+
+		if (i % 2) {
+			KUNIT_EXPECT_NE(test, 0, regmap_read(map, i, &rval));
+			KUNIT_EXPECT_NE(test, 0, regmap_write(map, i, rval));
+			KUNIT_EXPECT_FALSE(test, data->read[i]);
+			KUNIT_EXPECT_FALSE(test, data->written[i]);
+		} else {
+			KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+			KUNIT_EXPECT_EQ(test, data->vals[i], rval);
+			KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE,
+					data->read[i]);
+
+			KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, rval));
+			KUNIT_EXPECT_TRUE(test, data->written[i]);
+		}
+	}
+}
+
+static const struct regmap_range_cfg test_range = {
+	.selector_reg = 1,
+	.selector_mask = 0xff,
+
+	.window_start = 4,
+	.window_len = 10,
+
+	.range_min = 20,
+	.range_max = 40,
+};
+
+static bool test_range_window_volatile(struct device *dev, unsigned int reg)
+{
+	if (reg >= test_range.window_start &&
+	    reg <= test_range.window_start + test_range.window_len)
+		return true;
+
+	return false;
+}
+
+static bool test_range_all_volatile(struct device *dev, unsigned int reg)
+{
+	if (test_range_window_volatile(dev, reg))
+		return true;
+
+	if (reg >= test_range.range_min && reg <= test_range.range_max)
+		return true;
+
+	return false;
+}
+
+static void basic_ranges(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.volatile_reg = test_range_all_volatile;
+	config.ranges = &test_range;
+	config.num_ranges = 1;
+	config.max_register = test_range.range_max;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	for (i = test_range.range_min; i < test_range.range_max; i++) {
+		data->read[i] = false;
+		data->written[i] = false;
+	}
+
+	/* Reset the page to a non-zero value to trigger a change */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, test_range.selector_reg,
+					      test_range.range_max));
+
+	/* Check we set the page and use the window for writes */
+	data->written[test_range.selector_reg] = false;
+	data->written[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, test_range.range_min, 0));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.window_start]);
+
+	data->written[test_range.selector_reg] = false;
+	data->written[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map,
+					      test_range.range_min +
+					      test_range.window_len,
+					      0));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.window_start]);
+
+	/* Same for reads */
+	data->written[test_range.selector_reg] = false;
+	data->read[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, test_range.range_min, &val));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->read[test_range.window_start]);
+
+	data->written[test_range.selector_reg] = false;
+	data->read[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map,
+					     test_range.range_min +
+					     test_range.window_len,
+					     &val));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->read[test_range.window_start]);
+
+	/* No physical access triggered in the virtual range */
+	for (i = test_range.range_min; i < test_range.range_max; i++) {
+		KUNIT_EXPECT_FALSE(test, data->read[i]);
+		KUNIT_EXPECT_FALSE(test, data->written[i]);
+	}
+}
+
+/* Try to stress dynamic creation of cache data structures */
+static void stress_insert(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval, *vals;
+	size_t buf_sz;
+	int i;
+
+	config = test_regmap_config;
+	config.max_register = 300;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	buf_sz = array_size(sizeof(*vals), config.max_register);
+	vals = kunit_kmalloc(test, buf_sz, GFP_KERNEL);
+	KUNIT_ASSERT_FALSE(test, vals == NULL);
+
+	get_random_bytes(vals, buf_sz);
+
+	/* Write data into the map/cache in ever decreasing strides */
+	for (i = 0; i < config.max_register; i += 100)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i += 50)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i += 25)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i += 10)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i += 5)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i += 3)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i += 2)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+	for (i = 0; i < config.max_register; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, i, vals[i]));
+
+	/* Do reads from the cache (if there is one) match? */
+	for (i = 0; i < config.max_register; i ++) {
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+		KUNIT_EXPECT_EQ(test, rval, vals[i]);
+		KUNIT_EXPECT_EQ(test, config.cache_type == REGCACHE_NONE, data->read[i]);
+	}
+}
+
+static void cache_bypass(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val, rval;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Ensure the cache has a value in it */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg, val));
+
+	/* Bypass then write a different value */
+	regcache_cache_bypass(map, true);
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg, val + 1));
+
+	/* Read the bypassed value */
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg, &rval));
+	KUNIT_EXPECT_EQ(test, val + 1, rval);
+	KUNIT_EXPECT_EQ(test, data->vals[param->from_reg], rval);
+
+	/* Disable bypass, the cache should still return the original value */
+	regcache_cache_bypass(map, false);
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg, &rval));
+	KUNIT_EXPECT_EQ(test, val, rval);
+}
+
+static void cache_sync_marked_dirty(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Put some data into the cache */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, param->from_reg, val,
+						   BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+
+	/* Trash the data on the device itself then resync */
+	regcache_mark_dirty(map);
+	memset(data->vals, 0, sizeof(val));
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* Did we just write the correct data out? */
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], val, sizeof(val));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, true, data->written[param->from_reg + i]);
+}
+
+static void cache_sync_after_cache_only(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[BLOCK_TEST_SIZE];
+	unsigned int val_mask;
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	val_mask = GENMASK(config.val_bits - 1, 0);
+	get_random_bytes(&val, sizeof(val));
+
+	/* Put some data into the cache */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, param->from_reg, val,
+						   BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+
+	/* Set cache-only and change the values */
+	regcache_cache_only(map, true);
+	for (i = 0; i < ARRAY_SIZE(val); ++i)
+		val[i] = ~val[i] & val_mask;
+
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, param->from_reg, val,
+						   BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[param->from_reg + i]);
+
+	KUNIT_EXPECT_MEMNEQ(test, &data->vals[param->from_reg], val, sizeof(val));
+
+	/* Exit cache-only and sync the cache without marking hardware registers dirty */
+	regcache_cache_only(map, false);
+
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* Did we just write the correct data out? */
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], val, sizeof(val));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_TRUE(test, data->written[param->from_reg + i]);
+}
+
+static void cache_sync_defaults_marked_dirty(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Change the value of one register */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg + 2, val));
+
+	/* Resync */
+	regcache_mark_dirty(map);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* Did we just sync the one register we touched? */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, i == 2, data->written[param->from_reg + i]);
+
+	/* Rewrite registers back to their defaults */
+	for (i = 0; i < config.num_reg_defaults; ++i)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, config.reg_defaults[i].reg,
+						      config.reg_defaults[i].def));
+
+	/*
+	 * Resync after regcache_mark_dirty() should not write out registers
+	 * that are at default value
+	 */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+	regcache_mark_dirty(map);
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[param->from_reg + i]);
+}
+
+static void cache_sync_default_after_cache_only(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int orig_val;
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg + 2, &orig_val));
+
+	/* Enter cache-only and change the value of one register */
+	regcache_cache_only(map, true);
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg + 2, orig_val + 1));
+
+	/* Exit cache-only and resync, should write out the changed register */
+	regcache_cache_only(map, false);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* Was the register written out? */
+	KUNIT_EXPECT_TRUE(test, data->written[param->from_reg + 2]);
+	KUNIT_EXPECT_EQ(test, data->vals[param->from_reg + 2], orig_val + 1);
+
+	/* Enter cache-only and write register back to its default value */
+	regcache_cache_only(map, true);
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg + 2, orig_val));
+
+	/* Resync should write out the new value */
+	regcache_cache_only(map, false);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+	KUNIT_EXPECT_TRUE(test, data->written[param->from_reg + 2]);
+	KUNIT_EXPECT_EQ(test, data->vals[param->from_reg + 2], orig_val);
+}
+
+static void cache_sync_readonly(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.writeable_reg = reg_5_false;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Read all registers to fill the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg + i, &val));
+
+	/* Change the value of all registers, readonly should fail */
+	get_random_bytes(&val, sizeof(val));
+	regcache_cache_only(map, true);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, i != 5, regmap_write(map, param->from_reg + i, val) == 0);
+	regcache_cache_only(map, false);
+
+	/* Resync */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* Did that match what we see on the device? */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, i != 5, data->written[param->from_reg + i]);
+}
+
+static void cache_sync_patch(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	struct reg_sequence patch[2];
+	unsigned int rval[BLOCK_TEST_SIZE], val;
+	int i;
+
+	/* We need defaults so readback works */
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Stash the original values */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, param->from_reg, rval,
+						  BLOCK_TEST_SIZE));
+
+	/* Patch a couple of values */
+	patch[0].reg = param->from_reg + 2;
+	patch[0].def = rval[2] + 1;
+	patch[0].delay_us = 0;
+	patch[1].reg = param->from_reg + 5;
+	patch[1].def = rval[5] + 1;
+	patch[1].delay_us = 0;
+	KUNIT_EXPECT_EQ(test, 0, regmap_register_patch(map, patch,
+						       ARRAY_SIZE(patch)));
+
+	/* Sync the cache */
+	regcache_mark_dirty(map);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* The patch should be on the device but not in the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg + i, &val));
+		KUNIT_EXPECT_EQ(test, val, rval[i]);
+
+		switch (i) {
+		case 2:
+		case 5:
+			KUNIT_EXPECT_EQ(test, true, data->written[param->from_reg + i]);
+			KUNIT_EXPECT_EQ(test, data->vals[param->from_reg + i], rval[i] + 1);
+			break;
+		default:
+			KUNIT_EXPECT_EQ(test, false, data->written[param->from_reg + i]);
+			KUNIT_EXPECT_EQ(test, data->vals[param->from_reg + i], rval[i]);
+			break;
+		}
+	}
+}
+
+static void cache_drop(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Ensure the data is read from the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, param->from_reg, rval,
+						  BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+		KUNIT_EXPECT_FALSE(test, data->read[param->from_reg + i]);
+		data->read[param->from_reg + i] = false;
+	}
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], rval, sizeof(rval));
+
+	/* Drop some registers */
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, param->from_reg + 3,
+						      param->from_reg + 5));
+
+	/* Reread and check only the dropped registers hit the device. */
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, param->from_reg, rval,
+						  BLOCK_TEST_SIZE));
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, data->read[param->from_reg + i], i >= 3 && i <= 5);
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], rval, sizeof(rval));
+}
+
+static void cache_drop_with_non_contiguous_ranges(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val[4][BLOCK_TEST_SIZE];
+	unsigned int reg;
+	const int num_ranges = ARRAY_SIZE(val) * 2;
+	int rangeidx, i;
+
+	static_assert(ARRAY_SIZE(val) == 4);
+
+	config = test_regmap_config;
+	config.max_register = param->from_reg + (num_ranges * BLOCK_TEST_SIZE);
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	for (i = 0; i < config.max_register + 1; i++)
+		data->written[i] = false;
+
+	/* Create non-contiguous cache blocks by writing every other range */
+	get_random_bytes(&val, sizeof(val));
+	for (rangeidx = 0; rangeidx < num_ranges; rangeidx += 2) {
+		reg = param->from_reg + (rangeidx * BLOCK_TEST_SIZE);
+		KUNIT_EXPECT_EQ(test, 0, regmap_bulk_write(map, reg,
+							   &val[rangeidx / 2],
+							   BLOCK_TEST_SIZE));
+		KUNIT_EXPECT_MEMEQ(test, &data->vals[reg],
+				   &val[rangeidx / 2], sizeof(val[rangeidx / 2]));
+	}
+
+	/* Check that odd ranges weren't written */
+	for (rangeidx = 1; rangeidx < num_ranges; rangeidx += 2) {
+		reg = param->from_reg + (rangeidx * BLOCK_TEST_SIZE);
+		for (i = 0; i < BLOCK_TEST_SIZE; i++)
+			KUNIT_EXPECT_FALSE(test, data->written[reg + i]);
+	}
+
+	/* Drop range 2 */
+	reg = param->from_reg + (2 * BLOCK_TEST_SIZE);
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, reg, reg + BLOCK_TEST_SIZE - 1));
+
+	/* Drop part of range 4 */
+	reg = param->from_reg + (4 * BLOCK_TEST_SIZE);
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, reg + 3, reg + 5));
+
+	/* Mark dirty and reset mock registers to 0 */
+	regcache_mark_dirty(map);
+	for (i = 0; i < config.max_register + 1; i++) {
+		data->vals[i] = 0;
+		data->written[i] = false;
+	}
+
+	/* The registers that were dropped from range 4 should now remain at 0 */
+	val[4 / 2][3] = 0;
+	val[4 / 2][4] = 0;
+	val[4 / 2][5] = 0;
+
+	/* Sync and check that the expected register ranges were written */
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* Check that odd ranges weren't written */
+	for (rangeidx = 1; rangeidx < num_ranges; rangeidx += 2) {
+		reg = param->from_reg + (rangeidx * BLOCK_TEST_SIZE);
+		for (i = 0; i < BLOCK_TEST_SIZE; i++)
+			KUNIT_EXPECT_FALSE(test, data->written[reg + i]);
+	}
+
+	/* Check that even ranges (except 2 and 4) were written */
+	for (rangeidx = 0; rangeidx < num_ranges; rangeidx += 2) {
+		if ((rangeidx == 2) || (rangeidx == 4))
+			continue;
+
+		reg = param->from_reg + (rangeidx * BLOCK_TEST_SIZE);
+		for (i = 0; i < BLOCK_TEST_SIZE; i++)
+			KUNIT_EXPECT_TRUE(test, data->written[reg + i]);
+
+		KUNIT_EXPECT_MEMEQ(test, &data->vals[reg],
+				   &val[rangeidx / 2], sizeof(val[rangeidx / 2]));
+	}
+
+	/* Check that range 2 wasn't written */
+	reg = param->from_reg + (2 * BLOCK_TEST_SIZE);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[reg + i]);
+
+	/* Check that range 4 was partially written */
+	reg = param->from_reg + (4 * BLOCK_TEST_SIZE);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, data->written[reg + i], i < 3 || i > 5);
+
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[reg], &val[4 / 2], sizeof(val[4 / 2]));
+
+	/* Nothing before param->from_reg should have been written */
+	for (i = 0; i < param->from_reg; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[i]);
+}
+
+static void cache_drop_all_and_sync_marked_dirty(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Ensure the data is read from the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, param->from_reg, rval,
+						  BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], rval, sizeof(rval));
+
+	/* Change all values in cache from defaults */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg + i, rval[i] + 1));
+
+	/* Drop all registers */
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, 0, config.max_register));
+
+	/* Mark dirty and cache sync should not write anything. */
+	regcache_mark_dirty(map);
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+	for (i = 0; i <= config.max_register; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[i]);
+}
+
+static void cache_drop_all_and_sync_no_defaults(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Ensure the data is read from the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, param->from_reg, rval,
+						  BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], rval, sizeof(rval));
+
+	/* Change all values in cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg + i, rval[i] + 1));
+
+	/* Drop all registers */
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, 0, config.max_register));
+
+	/*
+	 * Sync cache without marking it dirty. All registers were dropped
+	 * so the cache should not have any entries to write out.
+	 */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+	for (i = 0; i <= config.max_register; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[i]);
+}
+
+static void cache_drop_all_and_sync_has_defaults(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval[BLOCK_TEST_SIZE];
+	int i;
+
+	config = test_regmap_config;
+	config.num_reg_defaults = BLOCK_TEST_SIZE;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Ensure the data is read from the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[param->from_reg + i] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_bulk_read(map, param->from_reg, rval,
+						  BLOCK_TEST_SIZE));
+	KUNIT_EXPECT_MEMEQ(test, &data->vals[param->from_reg], rval, sizeof(rval));
+
+	/* Change all values in cache from defaults */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_write(map, param->from_reg + i, rval[i] + 1));
+
+	/* Drop all registers */
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, 0, config.max_register));
+
+	/*
+	 * Sync cache without marking it dirty. All registers were dropped
+	 * so the cache should not have any entries to write out.
+	 */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->written[param->from_reg + i] = false;
+
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+	for (i = 0; i <= config.max_register; i++)
+		KUNIT_EXPECT_FALSE(test, data->written[i]);
+}
+
+static void cache_present(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[param->from_reg + i] = false;
+
+	/* No defaults so no registers cached. */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_FALSE(test, regcache_reg_cached(map, param->from_reg + i));
+
+	/* We didn't trigger any reads */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_FALSE(test, data->read[param->from_reg + i]);
+
+	/* Fill the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, param->from_reg + i, &val));
+
+	/* Now everything should be cached */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_TRUE(test, regcache_reg_cached(map, param->from_reg + i));
+}
+
+static void cache_write_zero(struct kunit *test)
+{
+	const struct regmap_test_param *param = test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[param->from_reg + i] = false;
+
+	/* No defaults so no registers cached. */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_FALSE(test, regcache_reg_cached(map, param->from_reg + i));
+
+	/* We didn't trigger any reads */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_FALSE(test, data->read[param->from_reg + i]);
+
+	/* Write a zero value */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 1, 0));
+
+	/* Read that zero value back */
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, 1, &val));
+	KUNIT_EXPECT_EQ(test, 0, val);
+
+	/* From the cache? */
+	KUNIT_ASSERT_TRUE(test, regcache_reg_cached(map, 1));
+
+	/* Try to throw it away */
+	KUNIT_EXPECT_EQ(test, 0, regcache_drop_region(map, 1, 1));
+	KUNIT_ASSERT_FALSE(test, regcache_reg_cached(map, 1));
+}
+
+/* Check that caching the window register works with sync */
+static void cache_range_window_reg(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.volatile_reg = test_range_window_volatile;
+	config.ranges = &test_range;
+	config.num_ranges = 1;
+	config.max_register = test_range.range_max;
+
+	map = gen_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Write new values to the entire range */
+	for (i = test_range.range_min; i <= test_range.range_max; i++)
+		KUNIT_ASSERT_EQ(test, 0, regmap_write(map, i, 0));
+
+	val = data->vals[test_range.selector_reg] & test_range.selector_mask;
+	KUNIT_ASSERT_EQ(test, val, 2);
+
+	/* Write to the first register in the range to reset the page */
+	KUNIT_ASSERT_EQ(test, 0, regmap_write(map, test_range.range_min, 0));
+	val = data->vals[test_range.selector_reg] & test_range.selector_mask;
+	KUNIT_ASSERT_EQ(test, val, 0);
+
+	/* Trigger a cache sync */
+	regcache_mark_dirty(map);
+	KUNIT_ASSERT_EQ(test, 0, regcache_sync(map));
+
+	/* Write to the first register again, the page should be reset */
+	KUNIT_ASSERT_EQ(test, 0, regmap_write(map, test_range.range_min, 0));
+	val = data->vals[test_range.selector_reg] & test_range.selector_mask;
+	KUNIT_ASSERT_EQ(test, val, 0);
+
+	/* Trigger another cache sync */
+	regcache_mark_dirty(map);
+	KUNIT_ASSERT_EQ(test, 0, regcache_sync(map));
+
+	/* Write to the last register again, the page should be reset */
+	KUNIT_ASSERT_EQ(test, 0, regmap_write(map, test_range.range_max, 0));
+	val = data->vals[test_range.selector_reg] & test_range.selector_mask;
+	KUNIT_ASSERT_EQ(test, val, 2);
+}
+
+static const struct regmap_test_param raw_types_list[] = {
+	{ .cache = REGCACHE_NONE,   .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_NONE,   .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_FLAT,   .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_FLAT,   .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_MAPLE,  .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_MAPLE,  .val_endian = REGMAP_ENDIAN_BIG },
+};
+
+KUNIT_ARRAY_PARAM(raw_test_types, raw_types_list, param_to_desc);
+
+static const struct regmap_test_param raw_cache_types_list[] = {
+	{ .cache = REGCACHE_FLAT,   .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_FLAT,   .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_FLAT_S, .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_RBTREE, .val_endian = REGMAP_ENDIAN_BIG },
+	{ .cache = REGCACHE_MAPLE,  .val_endian = REGMAP_ENDIAN_LITTLE },
+	{ .cache = REGCACHE_MAPLE,  .val_endian = REGMAP_ENDIAN_BIG },
+};
+
+KUNIT_ARRAY_PARAM(raw_test_cache_types, raw_cache_types_list, param_to_desc);
+
+static const struct regmap_config raw_regmap_config = {
+	.max_register = BLOCK_TEST_SIZE,
+
+	.reg_format_endian = REGMAP_ENDIAN_LITTLE,
+	.reg_bits = 16,
+	.val_bits = 16,
+};
+
+static struct regmap *gen_raw_regmap(struct kunit *test,
+				     struct regmap_config *config,
+				     struct regmap_ram_data **data)
+{
+	struct regmap_test_priv *priv = test->priv;
+	const struct regmap_test_param *param = test->param_value;
+	u16 *buf;
+	struct regmap *ret = ERR_PTR(-ENOMEM);
+	int i, error;
+	struct reg_default *defaults;
+	size_t size;
+
+	config->cache_type = param->cache;
+	config->val_format_endian = param->val_endian;
+	config->disable_locking = config->cache_type == REGCACHE_RBTREE ||
+					config->cache_type == REGCACHE_MAPLE;
+
+	size = array_size(config->max_register + 1, BITS_TO_BYTES(config->reg_bits));
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	get_random_bytes(buf, size);
+
+	*data = kzalloc(sizeof(**data), GFP_KERNEL);
+	if (!(*data))
+		goto out_free;
+	(*data)->vals = (void *)buf;
+
+	config->num_reg_defaults = config->max_register + 1;
+	defaults = kunit_kcalloc(test,
+				 config->num_reg_defaults,
+				 sizeof(struct reg_default),
+				 GFP_KERNEL);
+	if (!defaults)
+		goto out_free;
+	config->reg_defaults = defaults;
+
+	for (i = 0; i < config->num_reg_defaults; i++) {
+		defaults[i].reg = i;
+		switch (param->val_endian) {
+		case REGMAP_ENDIAN_LITTLE:
+			defaults[i].def = le16_to_cpu(buf[i]);
+			break;
+		case REGMAP_ENDIAN_BIG:
+			defaults[i].def = be16_to_cpu(buf[i]);
+			break;
+		default:
+			ret = ERR_PTR(-EINVAL);
+			goto out_free;
+		}
+	}
+
+	/*
+	 * We use the defaults in the tests but they don't make sense
+	 * to the core if there's no cache.
+	 */
+	if (config->cache_type == REGCACHE_NONE)
+		config->num_reg_defaults = 0;
+
+	ret = regmap_init_raw_ram(priv->dev, config, *data);
+	if (IS_ERR(ret))
+		goto out_free;
+
+	/* This calls regmap_exit() on failure, which frees buf and *data */
+	error = kunit_add_action_or_reset(test, regmap_exit_action, ret);
+	if (error)
+		ret = ERR_PTR(error);
+
+	return ret;
+
+out_free:
+	kfree(buf);
+	kfree(*data);
+
+	return ret;
+}
+
+static void raw_read_defaults_single(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int rval;
+	int i;
+
+	config = raw_regmap_config;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Check that we can read the defaults via the API */
+	for (i = 0; i < config.max_register + 1; i++) {
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+		KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
+	}
+}
+
+static void raw_read_defaults(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	u16 *rval;
+	u16 def;
+	size_t val_len;
+	int i;
+
+	config = raw_regmap_config;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	val_len = array_size(sizeof(*rval), config.max_register + 1);
+	rval = kunit_kmalloc(test, val_len, GFP_KERNEL);
+	KUNIT_ASSERT_TRUE(test, rval != NULL);
+	if (!rval)
+		return;
+
+	/* Check that we can read the defaults via the API */
+	KUNIT_EXPECT_EQ(test, 0, regmap_raw_read(map, 0, rval, val_len));
+	for (i = 0; i < config.max_register + 1; i++) {
+		def = config.reg_defaults[i].def;
+		if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+			KUNIT_EXPECT_EQ(test, def, be16_to_cpu((__force __be16)rval[i]));
+		} else {
+			KUNIT_EXPECT_EQ(test, def, le16_to_cpu((__force __le16)rval[i]));
+		}
+	}
+}
+
+static void raw_write_read_single(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	u16 val;
+	unsigned int rval;
+
+	config = raw_regmap_config;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* If we write a value to a register we can read it back */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 0, val));
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, 0, &rval));
+	KUNIT_EXPECT_EQ(test, val, rval);
+}
+
+static void raw_write(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	u16 *hw_buf;
+	u16 val[2];
+	unsigned int rval;
+	int i;
+
+	config = raw_regmap_config;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	hw_buf = (u16 *)data->vals;
+
+	get_random_bytes(&val, sizeof(val));
+
+	/* Do a raw write */
+	KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, sizeof(val)));
+
+	/* We should read back the new values, and defaults for the rest */
+	for (i = 0; i < config.max_register + 1; i++) {
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+
+		switch (i) {
+		case 2:
+		case 3:
+			if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+				KUNIT_EXPECT_EQ(test, rval,
+						be16_to_cpu((__force __be16)val[i % 2]));
+			} else {
+				KUNIT_EXPECT_EQ(test, rval,
+						le16_to_cpu((__force __le16)val[i % 2]));
+			}
+			break;
+		default:
+			KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
+			break;
+		}
+	}
+
+	/* The values should appear in the "hardware" */
+	KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], val, sizeof(val));
+}
+
+static bool reg_zero(struct device *dev, unsigned int reg)
+{
+	return reg == 0;
+}
+
+static bool ram_reg_zero(struct regmap_ram_data *data, unsigned int reg)
+{
+	return reg == 0;
+}
+
+static void raw_noinc_write(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	u16 val_test, val_last;
+	u16 val_array[BLOCK_TEST_SIZE];
+
+	config = raw_regmap_config;
+	config.volatile_reg = reg_zero;
+	config.writeable_noinc_reg = reg_zero;
+	config.readable_noinc_reg = reg_zero;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	data->noinc_reg = ram_reg_zero;
+
+	get_random_bytes(&val_array, sizeof(val_array));
+
+	if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+		val_test = be16_to_cpu(val_array[1]) + 100;
+		val_last = be16_to_cpu(val_array[BLOCK_TEST_SIZE - 1]);
+	} else {
+		val_test = le16_to_cpu(val_array[1]) + 100;
+		val_last = le16_to_cpu(val_array[BLOCK_TEST_SIZE - 1]);
+	}
+
+	/* Put some data into the register following the noinc register */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 1, val_test));
+
+	/* Write some data to the noinc register */
+	KUNIT_EXPECT_EQ(test, 0, regmap_noinc_write(map, 0, val_array,
+						    sizeof(val_array)));
+
+	/* We should read back the last value written */
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, 0, &val));
+	KUNIT_ASSERT_EQ(test, val_last, val);
+
+	/* Make sure we didn't touch the register after the noinc register */
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, 1, &val));
+	KUNIT_ASSERT_EQ(test, val_test, val);
+}
+
+static void raw_sync(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	u16 val[3];
+	u16 *hw_buf;
+	unsigned int rval;
+	int i;
+
+	config = raw_regmap_config;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	hw_buf = (u16 *)data->vals;
+
+	get_changed_bytes(&hw_buf[2], &val[0], sizeof(val));
+
+	/* Do a regular write and a raw write in cache only mode */
+	regcache_cache_only(map, true);
+	KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val,
+						  sizeof(u16) * 2));
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 4, val[2]));
+
+	/* We should read back the new values, and defaults for the rest */
+	for (i = 0; i < config.max_register + 1; i++) {
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+
+		switch (i) {
+		case 2:
+		case 3:
+			if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+				KUNIT_EXPECT_EQ(test, rval,
+						be16_to_cpu((__force __be16)val[i - 2]));
+			} else {
+				KUNIT_EXPECT_EQ(test, rval,
+						le16_to_cpu((__force __le16)val[i - 2]));
+			}
+			break;
+		case 4:
+			KUNIT_EXPECT_EQ(test, rval, val[i - 2]);
+			break;
+		default:
+			KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
+			break;
+		}
+	}
+
+	/*
+	 * The value written via _write() was translated by the core,
+	 * translate the original copy for comparison purposes.
+	 */
+	if (config.val_format_endian == REGMAP_ENDIAN_BIG)
+		val[2] = cpu_to_be16(val[2]);
+	else
+		val[2] = cpu_to_le16(val[2]);
+
+	/* The values should not appear in the "hardware" */
+	KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], &val[0], sizeof(val));
+
+	for (i = 0; i < config.max_register + 1; i++)
+		data->written[i] = false;
+
+	/* Do the sync */
+	regcache_cache_only(map, false);
+	regcache_mark_dirty(map);
+	KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+	/* The values should now appear in the "hardware" */
+	KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], &val[0], sizeof(val));
+}
+
+static void raw_ranges(struct kunit *test)
+{
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = raw_regmap_config;
+	config.volatile_reg = test_range_all_volatile;
+	config.ranges = &test_range;
+	config.num_ranges = 1;
+	config.max_register = test_range.range_max;
+
+	map = gen_raw_regmap(test, &config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	/* Reset the page to a non-zero value to trigger a change */
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, test_range.selector_reg,
+					      test_range.range_max));
+
+	/* Check we set the page and use the window for writes */
+	data->written[test_range.selector_reg] = false;
+	data->written[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map, test_range.range_min, 0));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.window_start]);
+
+	data->written[test_range.selector_reg] = false;
+	data->written[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_write(map,
+					      test_range.range_min +
+					      test_range.window_len,
+					      0));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.window_start]);
+
+	/* Same for reads */
+	data->written[test_range.selector_reg] = false;
+	data->read[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map, test_range.range_min, &val));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->read[test_range.window_start]);
+
+	data->written[test_range.selector_reg] = false;
+	data->read[test_range.window_start] = false;
+	KUNIT_EXPECT_EQ(test, 0, regmap_read(map,
+					     test_range.range_min +
+					     test_range.window_len,
+					     &val));
+	KUNIT_EXPECT_TRUE(test, data->written[test_range.selector_reg]);
+	KUNIT_EXPECT_TRUE(test, data->read[test_range.window_start]);
+
+	/* No physical access triggered in the virtual range */
+	for (i = test_range.range_min; i < test_range.range_max; i++) {
+		KUNIT_EXPECT_FALSE(test, data->read[i]);
+		KUNIT_EXPECT_FALSE(test, data->written[i]);
+	}
+}
+
+static struct kunit_case regmap_test_cases[] = {
+	KUNIT_CASE_PARAM(basic_read_write, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(read_bypassed, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(read_bypassed_volatile, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(bulk_write, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(bulk_read, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(multi_write, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(multi_read, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(write_readonly, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(read_writeonly, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(reg_defaults, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(reg_defaults_read_dev, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(register_patch, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(stride, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(basic_ranges, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(stress_insert, regcache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_bypass, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_sync_marked_dirty, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_sync_after_cache_only, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_sync_defaults_marked_dirty, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_sync_default_after_cache_only, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_sync_readonly, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_sync_patch, real_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_drop, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_drop_with_non_contiguous_ranges, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_drop_all_and_sync_marked_dirty, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_drop_all_and_sync_no_defaults, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_drop_all_and_sync_has_defaults, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_present, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_write_zero, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_range_window_reg, real_cache_types_only_gen_params),
+
+	KUNIT_CASE_PARAM(raw_read_defaults_single, raw_test_types_gen_params),
+	KUNIT_CASE_PARAM(raw_read_defaults, raw_test_types_gen_params),
+	KUNIT_CASE_PARAM(raw_write_read_single, raw_test_types_gen_params),
+	KUNIT_CASE_PARAM(raw_write, raw_test_types_gen_params),
+	KUNIT_CASE_PARAM(raw_noinc_write, raw_test_types_gen_params),
+	KUNIT_CASE_PARAM(raw_sync, raw_test_cache_types_gen_params),
+	KUNIT_CASE_PARAM(raw_ranges, raw_test_cache_types_gen_params),
+	{}
+};
+
+static int regmap_test_init(struct kunit *test)
+{
+	struct regmap_test_priv *priv;
+	struct device *dev;
+
+	priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	test->priv = priv;
+
+	dev = kunit_device_register(test, "regmap_test");
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	priv->dev = get_device(dev);
+	dev_set_drvdata(dev, test);
+
+	return 0;
+}
+
+static void regmap_test_exit(struct kunit *test)
+{
+	struct regmap_test_priv *priv = test->priv;
+
+	/* Destroy the dummy struct device */
+	if (priv && priv->dev)
+		put_device(priv->dev);
+}
+
+static struct kunit_suite regmap_test_suite = {
+	.name = "regmap",
+	.init = regmap_test_init,
+	.exit = regmap_test_exit,
+	.test_cases = regmap_test_cases,
+};
+kunit_test_suite(regmap_test_suite);
+
+MODULE_DESCRIPTION("Regmap KUnit tests");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
index 6a20201299f5..9573bf3b52f4 100644
--- a/drivers/base/regmap/regmap-mdio.c
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -10,31 +10,21 @@
 /* Clause-45 mask includes the device type (5 bit) and actual register number (16 bit) */
 #define REGNUM_C45_MASK		GENMASK(20, 0)
 
-static int regmap_mdio_read(struct mdio_device *mdio_dev, u32 reg, unsigned int *val)
+static int regmap_mdio_c22_read(void *context, unsigned int reg, unsigned int *val)
 {
+	struct mdio_device *mdio_dev = context;
 	int ret;
 
-	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg);
+	if (unlikely(reg & ~REGNUM_C22_MASK))
+		return -ENXIO;
+
+	ret = mdiodev_read(mdio_dev, reg);
 	if (ret < 0)
 		return ret;
 
 	*val = ret & REGVAL_MASK;
-	return 0;
-}
-
-static int regmap_mdio_write(struct mdio_device *mdio_dev, u32 reg, unsigned int val)
-{
-	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg, val);
-}
-
-static int regmap_mdio_c22_read(void *context, unsigned int reg, unsigned int *val)
-{
-	struct mdio_device *mdio_dev = context;
 
-	if (unlikely(reg & ~REGNUM_C22_MASK))
-		return -ENXIO;
-
-	return regmap_mdio_read(mdio_dev, reg, val);
+	return 0;
 }
 
 static int regmap_mdio_c22_write(void *context, unsigned int reg, unsigned int val)
@@ -44,7 +34,7 @@ static int regmap_mdio_c22_write(void *context, unsigned int reg, unsigned int v
 	if (unlikely(reg & ~REGNUM_C22_MASK))
 		return -ENXIO;
 
-	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg, val);
+	return mdiodev_write(mdio_dev, reg, val);
 }
 
 static const struct regmap_bus regmap_mdio_c22_bus = {
@@ -55,21 +45,36 @@ static const struct regmap_bus regmap_mdio_c22_bus = {
 static int regmap_mdio_c45_read(void *context, unsigned int reg, unsigned int *val)
 {
 	struct mdio_device *mdio_dev = context;
+	unsigned int devad;
+	int ret;
 
 	if (unlikely(reg & ~REGNUM_C45_MASK))
 		return -ENXIO;
 
-	return regmap_mdio_read(mdio_dev, MII_ADDR_C45 | reg, val);
+	devad = reg >> REGMAP_MDIO_C45_DEVAD_SHIFT;
+	reg = reg & REGMAP_MDIO_C45_REGNUM_MASK;
+
+	ret = mdiodev_c45_read(mdio_dev, devad, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret & REGVAL_MASK;
+
+	return 0;
 }
 
 static int regmap_mdio_c45_write(void *context, unsigned int reg, unsigned int val)
 {
 	struct mdio_device *mdio_dev = context;
+	unsigned int devad;
 
 	if (unlikely(reg & ~REGNUM_C45_MASK))
 		return -ENXIO;
 
-	return regmap_mdio_write(mdio_dev, MII_ADDR_C45 | reg, val);
+	devad = reg >> REGMAP_MDIO_C45_DEVAD_SHIFT;
+	reg = reg & REGMAP_MDIO_C45_REGNUM_MASK;
+
+	return mdiodev_c45_write(mdio_dev, devad, reg, val);
 }
 
 static const struct regmap_bus regmap_mdio_c45_bus = {
@@ -112,5 +117,5 @@ struct regmap *__devm_regmap_init_mdio(struct mdio_device *mdio_dev,
 EXPORT_SYMBOL_GPL(__devm_regmap_init_mdio);
 
 MODULE_AUTHOR("Sander Vanheule <sander@svanheule.net>");
-MODULE_DESCRIPTION("Regmap MDIO Module");
+MODULE_DESCRIPTION("regmap MDIO Module");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index f9cd51afb9d2..29e5f3175301 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -10,13 +10,14 @@
 #include <linux/module.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/swab.h>
 
 #include "internal.h"
 
 struct regmap_mmio_context {
 	void __iomem *regs;
-	unsigned val_bytes;
-	bool relaxed_mmio;
+	unsigned int val_bytes;
+	bool big_endian;
 
 	bool attached_clk;
 	struct clk *clk;
@@ -33,9 +34,6 @@ static int regmap_mmio_regbits_check(size_t reg_bits)
 	case 8:
 	case 16:
 	case 32:
-#ifdef CONFIG_64BIT
-	case 64:
-#endif
 		return 0;
 	default:
 		return -EINVAL;
@@ -50,18 +48,13 @@ static int regmap_mmio_get_min_stride(size_t val_bits)
 	case 8:
 		/* The core treats 0 as 1 */
 		min_stride = 0;
-		return 0;
+		break;
 	case 16:
 		min_stride = 2;
 		break;
 	case 32:
 		min_stride = 4;
 		break;
-#ifdef CONFIG_64BIT
-	case 64:
-		min_stride = 8;
-		break;
-#endif
 	default:
 		return -EINVAL;
 	}
@@ -83,6 +76,12 @@ static void regmap_mmio_write8_relaxed(struct regmap_mmio_context *ctx,
 	writeb_relaxed(val, ctx->regs + reg);
 }
 
+static void regmap_mmio_iowrite8(struct regmap_mmio_context *ctx,
+				 unsigned int reg, unsigned int val)
+{
+	iowrite8(val, ctx->regs + reg);
+}
+
 static void regmap_mmio_write16le(struct regmap_mmio_context *ctx,
 				  unsigned int reg,
 				  unsigned int val)
@@ -97,10 +96,22 @@ static void regmap_mmio_write16le_relaxed(struct regmap_mmio_context *ctx,
 	writew_relaxed(val, ctx->regs + reg);
 }
 
+static void regmap_mmio_iowrite16le(struct regmap_mmio_context *ctx,
+				    unsigned int reg, unsigned int val)
+{
+	iowrite16(val, ctx->regs + reg);
+}
+
 static void regmap_mmio_write16be(struct regmap_mmio_context *ctx,
 				  unsigned int reg,
 				  unsigned int val)
 {
+	writew(swab16(val), ctx->regs + reg);
+}
+
+static void regmap_mmio_iowrite16be(struct regmap_mmio_context *ctx,
+				    unsigned int reg, unsigned int val)
+{
 	iowrite16be(val, ctx->regs + reg);
 }
 
@@ -118,28 +129,24 @@ static void regmap_mmio_write32le_relaxed(struct regmap_mmio_context *ctx,
 	writel_relaxed(val, ctx->regs + reg);
 }
 
-static void regmap_mmio_write32be(struct regmap_mmio_context *ctx,
-				  unsigned int reg,
-				  unsigned int val)
+static void regmap_mmio_iowrite32le(struct regmap_mmio_context *ctx,
+				    unsigned int reg, unsigned int val)
 {
-	iowrite32be(val, ctx->regs + reg);
+	iowrite32(val, ctx->regs + reg);
 }
 
-#ifdef CONFIG_64BIT
-static void regmap_mmio_write64le(struct regmap_mmio_context *ctx,
+static void regmap_mmio_write32be(struct regmap_mmio_context *ctx,
 				  unsigned int reg,
 				  unsigned int val)
 {
-	writeq(val, ctx->regs + reg);
+	writel(swab32(val), ctx->regs + reg);
 }
 
-static void regmap_mmio_write64le_relaxed(struct regmap_mmio_context *ctx,
-				  unsigned int reg,
-				  unsigned int val)
+static void regmap_mmio_iowrite32be(struct regmap_mmio_context *ctx,
+				    unsigned int reg, unsigned int val)
 {
-	writeq_relaxed(val, ctx->regs + reg);
+	iowrite32be(val, ctx->regs + reg);
 }
-#endif
 
 static int regmap_mmio_write(void *context, unsigned int reg, unsigned int val)
 {
@@ -160,6 +167,69 @@ static int regmap_mmio_write(void *context, unsigned int reg, unsigned int val)
 	return 0;
 }
 
+static int regmap_mmio_noinc_write(void *context, unsigned int reg,
+				   const void *val, size_t val_count)
+{
+	struct regmap_mmio_context *ctx = context;
+	int ret = 0;
+	int i;
+
+	if (!IS_ERR(ctx->clk)) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	/*
+	 * There are no native, assembly-optimized write single register
+	 * operations for big endian, so fall back to emulation if this
+	 * is needed. (Single bytes are fine, they are not affected by
+	 * endianness.)
+	 */
+	if (ctx->big_endian && (ctx->val_bytes > 1)) {
+		switch (ctx->val_bytes) {
+		case 2:
+		{
+			const u16 *valp = (const u16 *)val;
+			for (i = 0; i < val_count; i++)
+				writew(swab16(valp[i]), ctx->regs + reg);
+			goto out_clk;
+		}
+		case 4:
+		{
+			const u32 *valp = (const u32 *)val;
+			for (i = 0; i < val_count; i++)
+				writel(swab32(valp[i]), ctx->regs + reg);
+			goto out_clk;
+		}
+		default:
+			ret = -EINVAL;
+			goto out_clk;
+		}
+	}
+
+	switch (ctx->val_bytes) {
+	case 1:
+		writesb(ctx->regs + reg, (const u8 *)val, val_count);
+		break;
+	case 2:
+		writesw(ctx->regs + reg, (const u16 *)val, val_count);
+		break;
+	case 4:
+		writesl(ctx->regs + reg, (const u32 *)val, val_count);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+out_clk:
+	if (!IS_ERR(ctx->clk))
+		clk_disable(ctx->clk);
+
+	return ret;
+}
+
 static unsigned int regmap_mmio_read8(struct regmap_mmio_context *ctx,
 				      unsigned int reg)
 {
@@ -172,6 +242,12 @@ static unsigned int regmap_mmio_read8_relaxed(struct regmap_mmio_context *ctx,
 	return readb_relaxed(ctx->regs + reg);
 }
 
+static unsigned int regmap_mmio_ioread8(struct regmap_mmio_context *ctx,
+					unsigned int reg)
+{
+	return ioread8(ctx->regs + reg);
+}
+
 static unsigned int regmap_mmio_read16le(struct regmap_mmio_context *ctx,
 				         unsigned int reg)
 {
@@ -184,9 +260,21 @@ static unsigned int regmap_mmio_read16le_relaxed(struct regmap_mmio_context *ctx
 	return readw_relaxed(ctx->regs + reg);
 }
 
+static unsigned int regmap_mmio_ioread16le(struct regmap_mmio_context *ctx,
+					   unsigned int reg)
+{
+	return ioread16(ctx->regs + reg);
+}
+
 static unsigned int regmap_mmio_read16be(struct regmap_mmio_context *ctx,
 				         unsigned int reg)
 {
+	return swab16(readw(ctx->regs + reg));
+}
+
+static unsigned int regmap_mmio_ioread16be(struct regmap_mmio_context *ctx,
+					   unsigned int reg)
+{
 	return ioread16be(ctx->regs + reg);
 }
 
@@ -202,25 +290,23 @@ static unsigned int regmap_mmio_read32le_relaxed(struct regmap_mmio_context *ctx
 	return readl_relaxed(ctx->regs + reg);
 }
 
-static unsigned int regmap_mmio_read32be(struct regmap_mmio_context *ctx,
-				         unsigned int reg)
+static unsigned int regmap_mmio_ioread32le(struct regmap_mmio_context *ctx,
+					   unsigned int reg)
 {
-	return ioread32be(ctx->regs + reg);
+	return ioread32(ctx->regs + reg);
 }
 
-#ifdef CONFIG_64BIT
-static unsigned int regmap_mmio_read64le(struct regmap_mmio_context *ctx,
+static unsigned int regmap_mmio_read32be(struct regmap_mmio_context *ctx,
 				         unsigned int reg)
 {
-	return readq(ctx->regs + reg);
+	return swab32(readl(ctx->regs + reg));
 }
 
-static unsigned int regmap_mmio_read64le_relaxed(struct regmap_mmio_context *ctx,
-						 unsigned int reg)
+static unsigned int regmap_mmio_ioread32be(struct regmap_mmio_context *ctx,
+					   unsigned int reg)
 {
-	return readq_relaxed(ctx->regs + reg);
+	return ioread32be(ctx->regs + reg);
 }
-#endif
 
 static int regmap_mmio_read(void *context, unsigned int reg, unsigned int *val)
 {
@@ -241,6 +327,61 @@ static int regmap_mmio_read(void *context, unsigned int reg, unsigned int *val)
 	return 0;
 }
 
+static int regmap_mmio_noinc_read(void *context, unsigned int reg,
+				  void *val, size_t val_count)
+{
+	struct regmap_mmio_context *ctx = context;
+	int ret = 0;
+
+	if (!IS_ERR(ctx->clk)) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	switch (ctx->val_bytes) {
+	case 1:
+		readsb(ctx->regs + reg, (u8 *)val, val_count);
+		break;
+	case 2:
+		readsw(ctx->regs + reg, (u16 *)val, val_count);
+		break;
+	case 4:
+		readsl(ctx->regs + reg, (u32 *)val, val_count);
+		break;
+	default:
+		ret = -EINVAL;
+		goto out_clk;
+	}
+
+	/*
+	 * There are no native, assembly-optimized write single register
+	 * operations for big endian, so fall back to emulation if this
+	 * is needed. (Single bytes are fine, they are not affected by
+	 * endianness.)
+	 */
+	if (ctx->big_endian && (ctx->val_bytes > 1)) {
+		switch (ctx->val_bytes) {
+		case 2:
+			swab16_array(val, val_count);
+			break;
+		case 4:
+			swab32_array(val, val_count);
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+out_clk:
+	if (!IS_ERR(ctx->clk))
+		clk_disable(ctx->clk);
+
+	return ret;
+}
+
+
 static void regmap_mmio_free_context(void *context)
 {
 	struct regmap_mmio_context *ctx = context;
@@ -257,6 +398,8 @@ static const struct regmap_bus regmap_mmio = {
 	.fast_io = true,
 	.reg_write = regmap_mmio_write,
 	.reg_read = regmap_mmio_read,
+	.reg_noinc_write = regmap_mmio_noinc_write,
+	.reg_noinc_read = regmap_mmio_noinc_read,
 	.free_context = regmap_mmio_free_context,
 	.val_format_endian_default = REGMAP_ENDIAN_LITTLE,
 };
@@ -281,7 +424,10 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 	if (min_stride < 0)
 		return ERR_PTR(min_stride);
 
-	if (config->reg_stride < min_stride)
+	if (config->reg_stride && config->reg_stride < min_stride)
+		return ERR_PTR(-EINVAL);
+
+	if (config->use_relaxed_mmio && config->io_port)
 		return ERR_PTR(-EINVAL);
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -290,7 +436,6 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 
 	ctx->regs = regs;
 	ctx->val_bytes = config->val_bits / 8;
-	ctx->relaxed_mmio = config->use_relaxed_mmio;
 	ctx->clk = ERR_PTR(-ENODEV);
 
 	switch (regmap_get_val_endian(dev, &regmap_mmio, config)) {
@@ -301,7 +446,10 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 #endif
 		switch (config->val_bits) {
 		case 8:
-			if (ctx->relaxed_mmio) {
+			if (config->io_port) {
+				ctx->reg_read = regmap_mmio_ioread8;
+				ctx->reg_write = regmap_mmio_iowrite8;
+			} else if (config->use_relaxed_mmio) {
 				ctx->reg_read = regmap_mmio_read8_relaxed;
 				ctx->reg_write = regmap_mmio_write8_relaxed;
 			} else {
@@ -310,7 +458,10 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 			}
 			break;
 		case 16:
-			if (ctx->relaxed_mmio) {
+			if (config->io_port) {
+				ctx->reg_read = regmap_mmio_ioread16le;
+				ctx->reg_write = regmap_mmio_iowrite16le;
+			} else if (config->use_relaxed_mmio) {
 				ctx->reg_read = regmap_mmio_read16le_relaxed;
 				ctx->reg_write = regmap_mmio_write16le_relaxed;
 			} else {
@@ -319,7 +470,10 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 			}
 			break;
 		case 32:
-			if (ctx->relaxed_mmio) {
+			if (config->io_port) {
+				ctx->reg_read = regmap_mmio_ioread32le;
+				ctx->reg_write = regmap_mmio_iowrite32le;
+			} else if (config->use_relaxed_mmio) {
 				ctx->reg_read = regmap_mmio_read32le_relaxed;
 				ctx->reg_write = regmap_mmio_write32le_relaxed;
 			} else {
@@ -327,17 +481,6 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 				ctx->reg_write = regmap_mmio_write32le;
 			}
 			break;
-#ifdef CONFIG_64BIT
-		case 64:
-			if (ctx->relaxed_mmio) {
-				ctx->reg_read = regmap_mmio_read64le_relaxed;
-				ctx->reg_write = regmap_mmio_write64le_relaxed;
-			} else {
-				ctx->reg_read = regmap_mmio_read64le;
-				ctx->reg_write = regmap_mmio_write64le;
-			}
-			break;
-#endif
 		default:
 			ret = -EINVAL;
 			goto err_free;
@@ -347,18 +490,34 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 #ifdef __BIG_ENDIAN
 	case REGMAP_ENDIAN_NATIVE:
 #endif
+		ctx->big_endian = true;
 		switch (config->val_bits) {
 		case 8:
-			ctx->reg_read = regmap_mmio_read8;
-			ctx->reg_write = regmap_mmio_write8;
+			if (config->io_port) {
+				ctx->reg_read = regmap_mmio_ioread8;
+				ctx->reg_write = regmap_mmio_iowrite8;
+			} else {
+				ctx->reg_read = regmap_mmio_read8;
+				ctx->reg_write = regmap_mmio_write8;
+			}
 			break;
 		case 16:
-			ctx->reg_read = regmap_mmio_read16be;
-			ctx->reg_write = regmap_mmio_write16be;
+			if (config->io_port) {
+				ctx->reg_read = regmap_mmio_ioread16be;
+				ctx->reg_write = regmap_mmio_iowrite16be;
+			} else {
+				ctx->reg_read = regmap_mmio_read16be;
+				ctx->reg_write = regmap_mmio_write16be;
+			}
 			break;
 		case 32:
-			ctx->reg_read = regmap_mmio_read32be;
-			ctx->reg_write = regmap_mmio_write32be;
+			if (config->io_port) {
+				ctx->reg_read = regmap_mmio_ioread32be;
+				ctx->reg_write = regmap_mmio_iowrite32be;
+			} else {
+				ctx->reg_read = regmap_mmio_read32be;
+				ctx->reg_write = regmap_mmio_write32be;
+			}
 			break;
 		default:
 			ret = -EINVAL;
@@ -450,4 +609,5 @@ void regmap_mmio_detach_clk(struct regmap *map)
 }
 EXPORT_SYMBOL_GPL(regmap_mmio_detach_clk);
 
+MODULE_DESCRIPTION("regmap MMIO Module");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-ram.c b/drivers/base/regmap/regmap-ram.c
new file mode 100644
index 000000000000..4e5b4518ce4d
--- /dev/null
+++ b/drivers/base/regmap/regmap-ram.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Register map access API - Memory region
+//
+// This is intended for testing only
+//
+// Copyright (c) 2023, Arm Ltd
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
+
+#include "internal.h"
+
+static int regmap_ram_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct regmap_ram_data *data = context;
+
+	data->vals[reg] = val;
+	data->written[reg] = true;
+
+	return 0;
+}
+
+static int regmap_ram_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct regmap_ram_data *data = context;
+
+	*val = data->vals[reg];
+	data->read[reg] = true;
+
+	return 0;
+}
+
+static void regmap_ram_free_context(void *context)
+{
+	struct regmap_ram_data *data = context;
+
+	kfree(data->vals);
+	kfree(data->read);
+	kfree(data->written);
+	kfree(data);
+}
+
+static const struct regmap_bus regmap_ram = {
+	.fast_io = true,
+	.reg_write = regmap_ram_write,
+	.reg_read = regmap_ram_read,
+	.free_context = regmap_ram_free_context,
+};
+
+struct regmap *__regmap_init_ram(struct device *dev,
+				 const struct regmap_config *config,
+				 struct regmap_ram_data *data,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name)
+{
+	struct regmap *map;
+
+	if (!config->max_register) {
+		pr_crit("No max_register specified for RAM regmap\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	data->read = kcalloc(config->max_register + 1, sizeof(bool),
+			     GFP_KERNEL);
+	if (!data->read)
+		return ERR_PTR(-ENOMEM);
+
+	data->written = kcalloc(config->max_register + 1, sizeof(bool),
+				GFP_KERNEL);
+	if (!data->written)
+		return ERR_PTR(-ENOMEM);
+
+	map = __regmap_init(dev, &regmap_ram, data, config,
+			    lock_key, lock_name);
+
+	return map;
+}
+EXPORT_SYMBOL_GPL(__regmap_init_ram);
+
+MODULE_DESCRIPTION("Register map access API - Memory region");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-raw-ram.c b/drivers/base/regmap/regmap-raw-ram.c
new file mode 100644
index 000000000000..76c98814fb8a
--- /dev/null
+++ b/drivers/base/regmap/regmap-raw-ram.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Register map access API - Memory region with raw access
+//
+// This is intended for testing only
+//
+// Copyright (c) 2023, Arm Ltd
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
+
+#include "internal.h"
+
+static unsigned int decode_reg(enum regmap_endian endian, const void *reg)
+{
+	const u16 *r = reg;
+
+	if (endian == REGMAP_ENDIAN_BIG)
+		return be16_to_cpu(*r);
+	else
+		return le16_to_cpu(*r);
+}
+
+static int regmap_raw_ram_gather_write(void *context,
+				       const void *reg, size_t reg_len,
+				       const void *val, size_t val_len)
+{
+	struct regmap_ram_data *data = context;
+	unsigned int r;
+	u16 *our_buf = (u16 *)data->vals;
+	int i;
+
+	if (reg_len != 2)
+		return -EINVAL;
+	if (val_len % 2)
+		return -EINVAL;
+
+	r = decode_reg(data->reg_endian, reg);
+	if (data->noinc_reg && data->noinc_reg(data, r)) {
+		memcpy(&our_buf[r], val + val_len - 2, 2);
+		data->written[r] = true;
+	} else {
+		memcpy(&our_buf[r], val, val_len);
+
+		for (i = 0; i < val_len / 2; i++)
+			data->written[r + i] = true;
+	}
+	
+	return 0;
+}
+
+static int regmap_raw_ram_write(void *context, const void *data, size_t count)
+{
+	return regmap_raw_ram_gather_write(context, data, 2,
+					   data + 2, count - 2);
+}
+
+static int regmap_raw_ram_read(void *context,
+			       const void *reg, size_t reg_len,
+			       void *val, size_t val_len)
+{
+	struct regmap_ram_data *data = context;
+	unsigned int r;
+	u16 *our_buf = (u16 *)data->vals;
+	int i;
+
+	if (reg_len != 2)
+		return -EINVAL;
+	if (val_len % 2)
+		return -EINVAL;
+
+	r = decode_reg(data->reg_endian, reg);
+	if (data->noinc_reg && data->noinc_reg(data, r)) {
+		for (i = 0; i < val_len; i += 2)
+			memcpy(val + i, &our_buf[r], 2);
+		data->read[r] = true;
+	} else {
+		memcpy(val, &our_buf[r], val_len);
+
+		for (i = 0; i < val_len / 2; i++)
+			data->read[r + i] = true;
+	}
+
+	return 0;
+}
+
+static void regmap_raw_ram_free_context(void *context)
+{
+	struct regmap_ram_data *data = context;
+
+	kfree(data->vals);
+	kfree(data->read);
+	kfree(data->written);
+	kfree(data);
+}
+
+static const struct regmap_bus regmap_raw_ram = {
+	.fast_io = true,
+	.write = regmap_raw_ram_write,
+	.gather_write = regmap_raw_ram_gather_write,
+	.read = regmap_raw_ram_read,
+	.free_context = regmap_raw_ram_free_context,
+};
+
+struct regmap *__regmap_init_raw_ram(struct device *dev,
+				     const struct regmap_config *config,
+				     struct regmap_ram_data *data,
+				     struct lock_class_key *lock_key,
+				     const char *lock_name)
+{
+	struct regmap *map;
+
+	if (config->reg_bits != 16)
+		return ERR_PTR(-EINVAL);
+
+	if (!config->max_register) {
+		pr_crit("No max_register specified for RAM regmap\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	data->read = kcalloc(config->max_register + 1, sizeof(bool),
+			     GFP_KERNEL);
+	if (!data->read)
+		return ERR_PTR(-ENOMEM);
+
+	data->written = kcalloc(config->max_register + 1, sizeof(bool),
+				GFP_KERNEL);
+	if (!data->written)
+		return ERR_PTR(-ENOMEM);
+
+	data->reg_endian = config->reg_format_endian;
+
+	map = __regmap_init(dev, &regmap_raw_ram, data, config,
+			    lock_key, lock_name);
+
+	return map;
+}
+EXPORT_SYMBOL_GPL(__regmap_init_raw_ram);
+
+MODULE_DESCRIPTION("Register map access API - Memory region with raw access");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-sccb.c b/drivers/base/regmap/regmap-sccb.c
index 597042e2d009..12bbbb03e5f2 100644
--- a/drivers/base/regmap/regmap-sccb.c
+++ b/drivers/base/regmap/regmap-sccb.c
@@ -80,7 +80,7 @@ static int regmap_sccb_write(void *context, unsigned int reg, unsigned int val)
 	return i2c_smbus_write_byte_data(i2c, reg, val);
 }
 
-static struct regmap_bus regmap_sccb_bus = {
+static const struct regmap_bus regmap_sccb_bus = {
 	.reg_write = regmap_sccb_write,
 	.reg_read = regmap_sccb_read,
 };
@@ -125,4 +125,5 @@ struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_sccb);
 
+MODULE_DESCRIPTION("Register map access API - SCCB support");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-sdw-mbq.c b/drivers/base/regmap/regmap-sdw-mbq.c
index fe3ac26b66ad..6a61629f5f89 100644
--- a/drivers/base/regmap/regmap-sdw-mbq.c
+++ b/drivers/base/regmap/regmap-sdw-mbq.c
@@ -1,48 +1,190 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright(c) 2020 Intel Corporation.
 
+#include <linux/bits.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/regmap.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_registers.h>
+#include <sound/sdca_function.h>
 #include "internal.h"
 
+struct regmap_mbq_context {
+	struct device *dev;
+	struct sdw_slave *sdw;
+
+	bool (*readable_reg)(struct device *dev, unsigned int reg);
+
+	struct regmap_sdw_mbq_cfg cfg;
+
+	int val_size;
+};
+
+static int regmap_sdw_mbq_size(struct regmap_mbq_context *ctx, unsigned int reg)
+{
+	int size = ctx->val_size;
+
+	if (ctx->cfg.mbq_size) {
+		size = ctx->cfg.mbq_size(ctx->dev, reg);
+		if (!size || size > ctx->val_size)
+			return -EINVAL;
+	}
+
+	return size;
+}
+
+static bool regmap_sdw_mbq_deferrable(struct regmap_mbq_context *ctx, unsigned int reg)
+{
+	if (ctx->cfg.deferrable)
+		return ctx->cfg.deferrable(ctx->dev, reg);
+
+	return false;
+}
+
+static int regmap_sdw_mbq_poll_busy(struct sdw_slave *slave, unsigned int reg,
+				    struct regmap_mbq_context *ctx)
+{
+	struct device *dev = ctx->dev;
+	int val, ret = 0;
+
+	dev_dbg(dev, "Deferring transaction for 0x%x\n", reg);
+
+	reg = SDW_SDCA_CTL(SDW_SDCA_CTL_FUNC(reg), 0,
+			   SDCA_CTL_ENTITY_0_FUNCTION_STATUS, 0);
+
+	if (ctx->readable_reg(dev, reg)) {
+		ret = read_poll_timeout(sdw_read_no_pm, val,
+					val < 0 || !(val & SDCA_CTL_ENTITY_0_FUNCTION_BUSY),
+					ctx->cfg.timeout_us, ctx->cfg.retry_us,
+					false, slave, reg);
+		if (val < 0)
+			return val;
+		if (ret)
+			dev_err(dev, "Function busy timed out 0x%x: %d\n", reg, val);
+	} else {
+		fsleep(ctx->cfg.timeout_us);
+	}
+
+	return ret;
+}
+
+static int regmap_sdw_mbq_write_impl(struct sdw_slave *slave,
+				     unsigned int reg, unsigned int val,
+				     int mbq_size, bool deferrable)
+{
+	int shift = mbq_size * BITS_PER_BYTE;
+	int ret;
+
+	while (--mbq_size > 0) {
+		shift -= BITS_PER_BYTE;
+
+		ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg),
+				      (val >> shift) & 0xff);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = sdw_write_no_pm(slave, reg, val & 0xff);
+	if (deferrable && ret == -ENODATA)
+		return -EAGAIN;
+
+	return ret;
+}
+
 static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int val)
 {
-	struct device *dev = context;
-	struct sdw_slave *slave = dev_to_sdw_dev(dev);
+	struct regmap_mbq_context *ctx = context;
+	struct sdw_slave *slave = ctx->sdw;
+	bool deferrable = regmap_sdw_mbq_deferrable(ctx, reg);
+	int mbq_size = regmap_sdw_mbq_size(ctx, reg);
 	int ret;
 
-	ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff);
-	if (ret < 0)
-		return ret;
+	if (mbq_size < 0)
+		return mbq_size;
+
+	/*
+	 * Technically the spec does allow a device to set itself to busy for
+	 * internal reasons, but since it doesn't provide any information on
+	 * how to handle timeouts in that case, for now the code will only
+	 * process a single wait/timeout on function busy and a single retry
+	 * of the transaction.
+	 */
+	ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size, deferrable);
+	if (ret == -EAGAIN) {
+		ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx);
+		if (ret)
+			return ret;
+
+		ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size, false);
+	}
 
-	return sdw_write_no_pm(slave, reg, val & 0xff);
+	return ret;
 }
 
-static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val)
+static int regmap_sdw_mbq_read_impl(struct sdw_slave *slave,
+				    unsigned int reg, unsigned int *val,
+				    int mbq_size, bool deferrable)
 {
-	struct device *dev = context;
-	struct sdw_slave *slave = dev_to_sdw_dev(dev);
-	int read0;
-	int read1;
+	int shift = BITS_PER_BYTE;
+	int read;
+
+	read = sdw_read_no_pm(slave, reg);
+	if (read < 0) {
+		if (deferrable && read == -ENODATA)
+			return -EAGAIN;
+
+		return read;
+	}
 
-	read0 = sdw_read_no_pm(slave, reg);
-	if (read0 < 0)
-		return read0;
+	*val = read;
 
-	read1 = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg));
-	if (read1 < 0)
-		return read1;
+	while (--mbq_size > 0) {
+		read = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg));
+		if (read < 0)
+			return read;
 
-	*val = (read1 << 8) | read0;
+		*val |= read << shift;
+		shift += BITS_PER_BYTE;
+	}
 
 	return 0;
 }
 
-static struct regmap_bus regmap_sdw_mbq = {
+static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct regmap_mbq_context *ctx = context;
+	struct sdw_slave *slave = ctx->sdw;
+	bool deferrable = regmap_sdw_mbq_deferrable(ctx, reg);
+	int mbq_size = regmap_sdw_mbq_size(ctx, reg);
+	int ret;
+
+	if (mbq_size < 0)
+		return mbq_size;
+
+	/*
+	 * Technically the spec does allow a device to set itself to busy for
+	 * internal reasons, but since it doesn't provide any information on
+	 * how to handle timeouts in that case, for now the code will only
+	 * process a single wait/timeout on function busy and a single retry
+	 * of the transaction.
+	 */
+	ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size, deferrable);
+	if (ret == -EAGAIN) {
+		ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx);
+		if (ret)
+			return ret;
+
+		ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size, false);
+	}
+
+	return ret;
+}
+
+static const struct regmap_bus regmap_sdw_mbq = {
 	.reg_read = regmap_sdw_mbq_read,
 	.reg_write = regmap_sdw_mbq_write,
 	.reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
@@ -51,8 +193,7 @@ static struct regmap_bus regmap_sdw_mbq = {
 
 static int regmap_sdw_mbq_config_check(const struct regmap_config *config)
 {
-	/* MBQ-based controls are only 16-bits for now */
-	if (config->val_bits != 16)
+	if (config->val_bits > (sizeof(unsigned int) * BITS_PER_BYTE))
 		return -ENOTSUPP;
 
 	/* Registers are 32 bits wide */
@@ -65,37 +206,73 @@ static int regmap_sdw_mbq_config_check(const struct regmap_config *config)
 	return 0;
 }
 
-struct regmap *__regmap_init_sdw_mbq(struct sdw_slave *sdw,
+static struct regmap_mbq_context *
+regmap_sdw_mbq_gen_context(struct device *dev,
+			   struct sdw_slave *sdw,
+			   const struct regmap_config *config,
+			   const struct regmap_sdw_mbq_cfg *mbq_config)
+{
+	struct regmap_mbq_context *ctx;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->dev = dev;
+	ctx->sdw = sdw;
+
+	if (mbq_config)
+		ctx->cfg = *mbq_config;
+
+	ctx->val_size = config->val_bits / BITS_PER_BYTE;
+	ctx->readable_reg = config->readable_reg;
+
+	return ctx;
+}
+
+struct regmap *__regmap_init_sdw_mbq(struct device *dev, struct sdw_slave *sdw,
 				     const struct regmap_config *config,
+				     const struct regmap_sdw_mbq_cfg *mbq_config,
 				     struct lock_class_key *lock_key,
 				     const char *lock_name)
 {
+	struct regmap_mbq_context *ctx;
 	int ret;
 
 	ret = regmap_sdw_mbq_config_check(config);
 	if (ret)
 		return ERR_PTR(ret);
 
-	return __regmap_init(&sdw->dev, &regmap_sdw_mbq,
-			&sdw->dev, config, lock_key, lock_name);
+	ctx = regmap_sdw_mbq_gen_context(dev, sdw, config, mbq_config);
+	if (IS_ERR(ctx))
+		return ERR_CAST(ctx);
+
+	return __regmap_init(dev, &regmap_sdw_mbq, ctx,
+			     config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__regmap_init_sdw_mbq);
 
-struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw,
+struct regmap *__devm_regmap_init_sdw_mbq(struct device *dev, struct sdw_slave *sdw,
 					  const struct regmap_config *config,
+					  const struct regmap_sdw_mbq_cfg *mbq_config,
 					  struct lock_class_key *lock_key,
 					  const char *lock_name)
 {
+	struct regmap_mbq_context *ctx;
 	int ret;
 
 	ret = regmap_sdw_mbq_config_check(config);
 	if (ret)
 		return ERR_PTR(ret);
 
-	return __devm_regmap_init(&sdw->dev, &regmap_sdw_mbq,
-			&sdw->dev, config, lock_key, lock_name);
+	ctx = regmap_sdw_mbq_gen_context(dev, sdw, config, mbq_config);
+	if (IS_ERR(ctx))
+		return ERR_CAST(ctx);
+
+	return __devm_regmap_init(dev, &regmap_sdw_mbq, ctx,
+				  config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw_mbq);
 
-MODULE_DESCRIPTION("Regmap SoundWire MBQ Module");
+MODULE_DESCRIPTION("regmap SoundWire MBQ Module");
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c
index 966de8a136d9..ea631ac7c7ec 100644
--- a/drivers/base/regmap/regmap-sdw.c
+++ b/drivers/base/regmap/regmap-sdw.c
@@ -6,50 +6,63 @@
 #include <linux/module.h>
 #include <linux/regmap.h>
 #include <linux/soundwire/sdw.h>
+#include <linux/types.h>
 #include "internal.h"
 
-static int regmap_sdw_write(void *context, unsigned int reg, unsigned int val)
+static int regmap_sdw_write(void *context, const void *val_buf, size_t val_size)
 {
 	struct device *dev = context;
 	struct sdw_slave *slave = dev_to_sdw_dev(dev);
+	/* First word of buffer contains the destination address */
+	u32 addr = le32_to_cpu(*(const __le32 *)val_buf);
+	const u8 *val = val_buf;
 
-	return sdw_write_no_pm(slave, reg, val);
+	return sdw_nwrite_no_pm(slave, addr, val_size - sizeof(addr), val + sizeof(addr));
 }
 
-static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val)
+static int regmap_sdw_gather_write(void *context,
+				   const void *reg_buf, size_t reg_size,
+				   const void *val_buf, size_t val_size)
 {
 	struct device *dev = context;
 	struct sdw_slave *slave = dev_to_sdw_dev(dev);
-	int read;
+	u32 addr = le32_to_cpu(*(const __le32 *)reg_buf);
 
-	read = sdw_read_no_pm(slave, reg);
-	if (read < 0)
-		return read;
+	return sdw_nwrite_no_pm(slave, addr, val_size, val_buf);
+}
 
-	*val = read;
-	return 0;
+static int regmap_sdw_read(void *context,
+			   const void *reg_buf, size_t reg_size,
+			   void *val_buf, size_t val_size)
+{
+	struct device *dev = context;
+	struct sdw_slave *slave = dev_to_sdw_dev(dev);
+	u32 addr = le32_to_cpu(*(const __le32 *)reg_buf);
+
+	return sdw_nread_no_pm(slave, addr, val_size, val_buf);
 }
 
-static struct regmap_bus regmap_sdw = {
-	.reg_read = regmap_sdw_read,
-	.reg_write = regmap_sdw_write,
+static const struct regmap_bus regmap_sdw = {
+	.write = regmap_sdw_write,
+	.gather_write = regmap_sdw_gather_write,
+	.read = regmap_sdw_read,
 	.reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
 	.val_format_endian_default = REGMAP_ENDIAN_LITTLE,
 };
 
 static int regmap_sdw_config_check(const struct regmap_config *config)
 {
-	/* All register are 8-bits wide as per MIPI Soundwire 1.0 Spec */
-	if (config->val_bits != 8)
-		return -ENOTSUPP;
-
-	/* Registers are 32 bits wide */
+	/* Register addresses are 32 bits wide */
 	if (config->reg_bits != 32)
 		return -ENOTSUPP;
 
 	if (config->pad_bits != 0)
 		return -ENOTSUPP;
 
+	/* Only bulk writes are supported not multi-register writes */
+	if (config->can_multi_write)
+		return -ENOTSUPP;
+
 	return 0;
 }
 
@@ -85,5 +98,5 @@ struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw);
 
-MODULE_DESCRIPTION("Regmap SoundWire Module");
+MODULE_DESCRIPTION("regmap SoundWire Module");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c
index 0968059f1ef5..e523fae73004 100644
--- a/drivers/base/regmap/regmap-slimbus.c
+++ b/drivers/base/regmap/regmap-slimbus.c
@@ -22,7 +22,7 @@ static int regmap_slimbus_read(void *context, const void *reg, size_t reg_size,
 	return slim_read(sdev, *(u16 *)reg, val_size, val);
 }
 
-static struct regmap_bus regmap_slimbus_bus = {
+static const struct regmap_bus regmap_slimbus_bus = {
 	.write = regmap_slimbus_write,
 	.read = regmap_slimbus_read,
 	.reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
@@ -48,8 +48,7 @@ struct regmap *__regmap_init_slimbus(struct slim_device *slimbus,
 	if (IS_ERR(bus))
 		return ERR_CAST(bus);
 
-	return __regmap_init(&slimbus->dev, bus, &slimbus->dev, config,
-			     lock_key, lock_name);
+	return __regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__regmap_init_slimbus);
 
@@ -63,9 +62,9 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus,
 	if (IS_ERR(bus))
 		return ERR_CAST(bus);
 
-	return __devm_regmap_init(&slimbus->dev, bus, &slimbus, config,
-				  lock_key, lock_name);
+	return __devm_regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_slimbus);
 
+MODULE_DESCRIPTION("Register map access API - SLIMbus support");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c
index ad1da83e849f..d86a06cadcdb 100644
--- a/drivers/base/regmap/regmap-spi-avmm.c
+++ b/drivers/base/regmap/regmap-spi-avmm.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
+#include <linux/swab.h>
 
 /*
  * This driver implements the regmap operations for a generic SPI
@@ -162,19 +163,12 @@ struct spi_avmm_bridge {
 	/* bridge buffer used in translation between protocol layers */
 	char trans_buf[TRANS_BUF_SIZE];
 	char phy_buf[PHY_BUF_SIZE];
-	void (*swap_words)(char *buf, unsigned int len);
+	void (*swap_words)(void *buf, unsigned int len);
 };
 
-static void br_swap_words_32(char *buf, unsigned int len)
+static void br_swap_words_32(void *buf, unsigned int len)
 {
-	u32 *p = (u32 *)buf;
-	unsigned int count;
-
-	count = len / 4;
-	while (count--) {
-		*p = swab32p(p);
-		p++;
-	}
+	swab32_array(buf, len / 4);
 }
 
 /*
@@ -716,4 +710,5 @@ struct regmap *__devm_regmap_init_spi_avmm(struct spi_device *spi,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_spi_avmm);
 
+MODULE_DESCRIPTION("Register map access API - SPI AVMM support");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c
index c1894e93c378..14b1d88997cb 100644
--- a/drivers/base/regmap/regmap-spi.c
+++ b/drivers/base/regmap/regmap-spi.c
@@ -109,13 +109,44 @@ static const struct regmap_bus regmap_spi = {
 	.val_format_endian_default = REGMAP_ENDIAN_BIG,
 };
 
+static const struct regmap_bus *regmap_get_spi_bus(struct spi_device *spi,
+						   const struct regmap_config *config)
+{
+	size_t max_size = spi_max_transfer_size(spi);
+	size_t max_msg_size, reg_reserve_size;
+	struct regmap_bus *bus;
+
+	if (max_size != SIZE_MAX) {
+		bus = kmemdup(&regmap_spi, sizeof(*bus), GFP_KERNEL);
+		if (!bus)
+			return ERR_PTR(-ENOMEM);
+
+		max_msg_size = spi_max_message_size(spi);
+		reg_reserve_size = (config->reg_bits + config->pad_bits) / BITS_PER_BYTE;
+		if (max_size + reg_reserve_size > max_msg_size)
+			max_size -= reg_reserve_size;
+
+		bus->free_on_exit = true;
+		bus->max_raw_read = max_size;
+		bus->max_raw_write = max_size;
+
+		return bus;
+	}
+
+	return &regmap_spi;
+}
+
 struct regmap *__regmap_init_spi(struct spi_device *spi,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
 				 const char *lock_name)
 {
-	return __regmap_init(&spi->dev, &regmap_spi, &spi->dev, config,
-			     lock_key, lock_name);
+	const struct regmap_bus *bus = regmap_get_spi_bus(spi, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __regmap_init(&spi->dev, bus, &spi->dev, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__regmap_init_spi);
 
@@ -124,9 +155,14 @@ struct regmap *__devm_regmap_init_spi(struct spi_device *spi,
 				      struct lock_class_key *lock_key,
 				      const char *lock_name)
 {
-	return __devm_regmap_init(&spi->dev, &regmap_spi, &spi->dev, config,
-				  lock_key, lock_name);
+	const struct regmap_bus *bus = regmap_get_spi_bus(spi, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __devm_regmap_init(&spi->dev, bus, &spi->dev, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_spi);
 
+MODULE_DESCRIPTION("regmap SPI Module");
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
index cdf12d2aa3a1..347bfe9544ce 100644
--- a/drivers/base/regmap/regmap-spmi.c
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -222,4 +222,5 @@ struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *sdev,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_spmi_ext);
 
+MODULE_DESCRIPTION("Register map access API - SPMI support");
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-w1.c b/drivers/base/regmap/regmap-w1.c
index 1fbaaad71ca5..29fd24f9c7ed 100644
--- a/drivers/base/regmap/regmap-w1.c
+++ b/drivers/base/regmap/regmap-w1.c
@@ -172,17 +172,17 @@ static int w1_reg_a16_v16_write(void *context, unsigned int reg,
  * Various types of supported bus addressing
  */
 
-static struct regmap_bus regmap_w1_bus_a8_v8 = {
+static const struct regmap_bus regmap_w1_bus_a8_v8 = {
 	.reg_read = w1_reg_a8_v8_read,
 	.reg_write = w1_reg_a8_v8_write,
 };
 
-static struct regmap_bus regmap_w1_bus_a8_v16 = {
+static const struct regmap_bus regmap_w1_bus_a8_v16 = {
 	.reg_read = w1_reg_a8_v16_read,
 	.reg_write = w1_reg_a8_v16_write,
 };
 
-static struct regmap_bus regmap_w1_bus_a16_v16 = {
+static const struct regmap_bus regmap_w1_bus_a16_v16 = {
 	.reg_read = w1_reg_a16_v16_read,
 	.reg_write = w1_reg_a16_v16_write,
 };
@@ -234,4 +234,5 @@ struct regmap *__devm_regmap_init_w1(struct device *w1_dev,
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_w1);
 
+MODULE_DESCRIPTION("Register map access API - W1 (1-Wire) support");
 MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index fe3e38dd5324..ce9be3989a21 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -17,7 +17,7 @@
 #include <linux/delay.h>
 #include <linux/log2.h>
 #include <linux/hwspinlock.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(regmap_check_range_table);
 
 bool regmap_writeable(struct regmap *map, unsigned int reg)
 {
-	if (map->max_register && reg > map->max_register)
+	if (map->max_register_is_set && reg > map->max_register)
 		return false;
 
 	if (map->writeable_reg)
@@ -112,7 +112,7 @@ bool regmap_cached(struct regmap *map, unsigned int reg)
 	if (!map->cache_ops)
 		return false;
 
-	if (map->max_register && reg > map->max_register)
+	if (map->max_register_is_set && reg > map->max_register)
 		return false;
 
 	map->lock(map->lock_arg);
@@ -129,7 +129,7 @@ bool regmap_readable(struct regmap *map, unsigned int reg)
 	if (!map->reg_read)
 		return false;
 
-	if (map->max_register && reg > map->max_register)
+	if (map->max_register_is_set && reg > map->max_register)
 		return false;
 
 	if (map->format.format_write)
@@ -288,15 +288,9 @@ static void regmap_format_16_native(void *buf, unsigned int val,
 	memcpy(buf, &v, sizeof(v));
 }
 
-static void regmap_format_24(void *buf, unsigned int val, unsigned int shift)
+static void regmap_format_24_be(void *buf, unsigned int val, unsigned int shift)
 {
-	u8 *b = buf;
-
-	val <<= shift;
-
-	b[0] = val >> 16;
-	b[1] = val >> 8;
-	b[2] = val;
+	put_unaligned_be24(val << shift, buf);
 }
 
 static void regmap_format_32_be(void *buf, unsigned int val, unsigned int shift)
@@ -317,26 +311,6 @@ static void regmap_format_32_native(void *buf, unsigned int val,
 	memcpy(buf, &v, sizeof(v));
 }
 
-#ifdef CONFIG_64BIT
-static void regmap_format_64_be(void *buf, unsigned int val, unsigned int shift)
-{
-	put_unaligned_be64((u64) val << shift, buf);
-}
-
-static void regmap_format_64_le(void *buf, unsigned int val, unsigned int shift)
-{
-	put_unaligned_le64((u64) val << shift, buf);
-}
-
-static void regmap_format_64_native(void *buf, unsigned int val,
-				    unsigned int shift)
-{
-	u64 v = (u64) val << shift;
-
-	memcpy(buf, &v, sizeof(v));
-}
-#endif
-
 static void regmap_parse_inplace_noop(void *buf)
 {
 }
@@ -380,14 +354,9 @@ static unsigned int regmap_parse_16_native(const void *buf)
 	return v;
 }
 
-static unsigned int regmap_parse_24(const void *buf)
+static unsigned int regmap_parse_24_be(const void *buf)
 {
-	const u8 *b = buf;
-	unsigned int ret = b[2];
-	ret |= ((unsigned int)b[1]) << 8;
-	ret |= ((unsigned int)b[0]) << 16;
-
-	return ret;
+	return get_unaligned_be24(buf);
 }
 
 static unsigned int regmap_parse_32_be(const void *buf)
@@ -422,40 +391,6 @@ static unsigned int regmap_parse_32_native(const void *buf)
 	return v;
 }
 
-#ifdef CONFIG_64BIT
-static unsigned int regmap_parse_64_be(const void *buf)
-{
-	return get_unaligned_be64(buf);
-}
-
-static unsigned int regmap_parse_64_le(const void *buf)
-{
-	return get_unaligned_le64(buf);
-}
-
-static void regmap_parse_64_be_inplace(void *buf)
-{
-	u64 v =  get_unaligned_be64(buf);
-
-	memcpy(buf, &v, sizeof(v));
-}
-
-static void regmap_parse_64_le_inplace(void *buf)
-{
-	u64 v = get_unaligned_le64(buf);
-
-	memcpy(buf, &v, sizeof(v));
-}
-
-static unsigned int regmap_parse_64_native(const void *buf)
-{
-	u64 v;
-
-	memcpy(&v, buf, sizeof(v));
-	return v;
-}
-#endif
-
 static void regmap_lock_hwlock(void *__map)
 {
 	struct regmap *map = __map;
@@ -533,6 +468,23 @@ __releases(&map->spinlock)
 	spin_unlock_irqrestore(&map->spinlock, map->spinlock_flags);
 }
 
+static void regmap_lock_raw_spinlock(void *__map)
+__acquires(&map->raw_spinlock)
+{
+	struct regmap *map = __map;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&map->raw_spinlock, flags);
+	map->raw_spinlock_flags = flags;
+}
+
+static void regmap_unlock_raw_spinlock(void *__map)
+__releases(&map->raw_spinlock)
+{
+	struct regmap *map = __map;
+	raw_spin_unlock_irqrestore(&map->raw_spinlock, map->raw_spinlock_flags);
+}
+
 static void dev_get_regmap_release(struct device *dev, void *res)
 {
 	/*
@@ -630,6 +582,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 	if (ret)
 		return ret;
 
+	regmap_debugfs_exit(map);
 	regmap_debugfs_init(map);
 
 	/* Add a devres resource for dev_get_regmap() */
@@ -645,6 +598,17 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 }
 EXPORT_SYMBOL_GPL(regmap_attach_dev);
 
+static int dev_get_regmap_match(struct device *dev, void *res, void *data);
+
+static int regmap_detach_dev(struct device *dev, struct regmap *map)
+{
+	if (!dev)
+		return 0;
+
+	return devres_release(dev, dev_get_regmap_release,
+			      dev_get_regmap_match, (void *)map->name);
+}
+
 static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus,
 					const struct regmap_config *config)
 {
@@ -770,11 +734,19 @@ struct regmap *__regmap_init(struct device *dev,
 	} else {
 		if ((bus && bus->fast_io) ||
 		    config->fast_io) {
-			spin_lock_init(&map->spinlock);
-			map->lock = regmap_lock_spinlock;
-			map->unlock = regmap_unlock_spinlock;
-			lockdep_set_class_and_name(&map->spinlock,
-						   lock_key, lock_name);
+			if (config->use_raw_spinlock) {
+				raw_spin_lock_init(&map->raw_spinlock);
+				map->lock = regmap_lock_raw_spinlock;
+				map->unlock = regmap_unlock_raw_spinlock;
+				lockdep_set_class_and_name(&map->raw_spinlock,
+							   lock_key, lock_name);
+			} else {
+				spin_lock_init(&map->spinlock);
+				map->lock = regmap_lock_spinlock;
+				map->unlock = regmap_unlock_spinlock;
+				lockdep_set_class_and_name(&map->spinlock,
+							   lock_key, lock_name);
+			}
 		} else {
 			mutex_init(&map->mutex);
 			map->lock = regmap_lock_mutex;
@@ -784,6 +756,7 @@ struct regmap *__regmap_init(struct device *dev,
 						   lock_key, lock_name);
 		}
 		map->lock_arg = map;
+		map->lock_key = lock_key;
 	}
 
 	/*
@@ -795,12 +768,14 @@ struct regmap *__regmap_init(struct device *dev,
 	else
 		map->alloc_flags = GFP_KERNEL;
 
-	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
-	map->format.pad_bytes = config->pad_bits / 8;
-	map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8);
-	map->format.buf_size = DIV_ROUND_UP(config->reg_bits +
-			config->val_bits + config->pad_bits, 8);
+	map->reg_base = config->reg_base;
 	map->reg_shift = config->pad_bits % 8;
+
+	map->format.pad_bytes = config->pad_bits / 8;
+	map->format.reg_shift = config->reg_shift;
+	map->format.reg_bytes = BITS_TO_BYTES(config->reg_bits);
+	map->format.val_bytes = BITS_TO_BYTES(config->val_bits);
+	map->format.buf_size = BITS_TO_BYTES(config->reg_bits + config->val_bits + config->pad_bits);
 	if (config->reg_stride)
 		map->reg_stride = config->reg_stride;
 	else
@@ -809,17 +784,21 @@ struct regmap *__regmap_init(struct device *dev,
 		map->reg_stride_order = ilog2(map->reg_stride);
 	else
 		map->reg_stride_order = -1;
-	map->use_single_read = config->use_single_read || !bus || !bus->read;
-	map->use_single_write = config->use_single_write || !bus || !bus->write;
-	map->can_multi_write = config->can_multi_write && bus && bus->write;
+	map->use_single_read = config->use_single_read || !(config->read || (bus && bus->read));
+	map->use_single_write = config->use_single_write || !(config->write || (bus && bus->write));
+	map->can_multi_write = config->can_multi_write && (config->write || (bus && bus->write));
 	if (bus) {
 		map->max_raw_read = bus->max_raw_read;
 		map->max_raw_write = bus->max_raw_write;
+	} else if (config->max_raw_read && config->max_raw_write) {
+		map->max_raw_read = config->max_raw_read;
+		map->max_raw_write = config->max_raw_write;
 	}
 	map->dev = dev;
 	map->bus = bus;
 	map->bus_context = bus_context;
 	map->max_register = config->max_register;
+	map->max_register_is_set = map->max_register ?: config->max_register_is_0;
 	map->wr_table = config->wr_table;
 	map->rd_table = config->rd_table;
 	map->volatile_table = config->volatile_table;
@@ -848,9 +827,21 @@ struct regmap *__regmap_init(struct device *dev,
 		map->read_flag_mask = bus->read_flag_mask;
 	}
 
-	if (!bus) {
+	if (config->read && config->write) {
+		map->reg_read  = _regmap_bus_read;
+		if (config->reg_update_bits)
+			map->reg_update_bits = config->reg_update_bits;
+
+		/* Bulk read/write */
+		map->read = config->read;
+		map->write = config->write;
+
+		reg_endian = REGMAP_ENDIAN_NATIVE;
+		val_endian = REGMAP_ENDIAN_NATIVE;
+	} else if (!bus) {
 		map->reg_read  = config->reg_read;
 		map->reg_write = config->reg_write;
+		map->reg_update_bits = config->reg_update_bits;
 
 		map->defer_caching = false;
 		goto skip_format_initialization;
@@ -864,10 +855,13 @@ struct regmap *__regmap_init(struct device *dev,
 	} else {
 		map->reg_read  = _regmap_bus_read;
 		map->reg_update_bits = bus->reg_update_bits;
-	}
+		/* Bulk read/write */
+		map->read = bus->read;
+		map->write = bus->write;
 
-	reg_endian = regmap_get_reg_endian(bus, config);
-	val_endian = regmap_get_val_endian(dev, bus, config);
+		reg_endian = regmap_get_reg_endian(bus, config);
+		val_endian = regmap_get_val_endian(dev, bus, config);
+	}
 
 	switch (config->reg_bits + map->reg_shift) {
 	case 2:
@@ -944,44 +938,30 @@ struct regmap *__regmap_init(struct device *dev,
 		break;
 
 	case 24:
-		if (reg_endian != REGMAP_ENDIAN_BIG)
-			goto err_hwlock;
-		map->format.format_reg = regmap_format_24;
-		break;
-
-	case 32:
 		switch (reg_endian) {
 		case REGMAP_ENDIAN_BIG:
-			map->format.format_reg = regmap_format_32_be;
-			break;
-		case REGMAP_ENDIAN_LITTLE:
-			map->format.format_reg = regmap_format_32_le;
-			break;
-		case REGMAP_ENDIAN_NATIVE:
-			map->format.format_reg = regmap_format_32_native;
+			map->format.format_reg = regmap_format_24_be;
 			break;
 		default:
 			goto err_hwlock;
 		}
 		break;
 
-#ifdef CONFIG_64BIT
-	case 64:
+	case 32:
 		switch (reg_endian) {
 		case REGMAP_ENDIAN_BIG:
-			map->format.format_reg = regmap_format_64_be;
+			map->format.format_reg = regmap_format_32_be;
 			break;
 		case REGMAP_ENDIAN_LITTLE:
-			map->format.format_reg = regmap_format_64_le;
+			map->format.format_reg = regmap_format_32_le;
 			break;
 		case REGMAP_ENDIAN_NATIVE:
-			map->format.format_reg = regmap_format_64_native;
+			map->format.format_reg = regmap_format_32_native;
 			break;
 		default:
 			goto err_hwlock;
 		}
 		break;
-#endif
 
 	default:
 		goto err_hwlock;
@@ -1017,10 +997,14 @@ struct regmap *__regmap_init(struct device *dev,
 		}
 		break;
 	case 24:
-		if (val_endian != REGMAP_ENDIAN_BIG)
+		switch (val_endian) {
+		case REGMAP_ENDIAN_BIG:
+			map->format.format_val = regmap_format_24_be;
+			map->format.parse_val = regmap_parse_24_be;
+			break;
+		default:
 			goto err_hwlock;
-		map->format.format_val = regmap_format_24;
-		map->format.parse_val = regmap_parse_24;
+		}
 		break;
 	case 32:
 		switch (val_endian) {
@@ -1042,28 +1026,6 @@ struct regmap *__regmap_init(struct device *dev,
 			goto err_hwlock;
 		}
 		break;
-#ifdef CONFIG_64BIT
-	case 64:
-		switch (val_endian) {
-		case REGMAP_ENDIAN_BIG:
-			map->format.format_val = regmap_format_64_be;
-			map->format.parse_val = regmap_parse_64_be;
-			map->format.parse_inplace = regmap_parse_64_be_inplace;
-			break;
-		case REGMAP_ENDIAN_LITTLE:
-			map->format.format_val = regmap_format_64_le;
-			map->format.parse_val = regmap_parse_64_le;
-			map->format.parse_inplace = regmap_parse_64_le_inplace;
-			break;
-		case REGMAP_ENDIAN_NATIVE:
-			map->format.format_val = regmap_format_64_native;
-			map->format.parse_val = regmap_parse_64_native;
-			break;
-		default:
-			goto err_hwlock;
-		}
-		break;
-#endif
 	}
 
 	if (map->format.format_write) {
@@ -1100,13 +1062,13 @@ skip_format_initialization:
 
 		/* Sanity check */
 		if (range_cfg->range_max < range_cfg->range_min) {
-			dev_err(map->dev, "Invalid range %d: %d < %d\n", i,
+			dev_err(map->dev, "Invalid range %d: %u < %u\n", i,
 				range_cfg->range_max, range_cfg->range_min);
 			goto err_range;
 		}
 
 		if (range_cfg->range_max > map->max_register) {
-			dev_err(map->dev, "Invalid range %d: %d > %d\n", i,
+			dev_err(map->dev, "Invalid range %d: %u > %u\n", i,
 				range_cfg->range_max, map->max_register);
 			goto err_range;
 		}
@@ -1126,10 +1088,10 @@ skip_format_initialization:
 		/* Make sure, that this register range has no selector
 		   or data window within its boundary */
 		for (j = 0; j < config->num_ranges; j++) {
-			unsigned sel_reg = config->ranges[j].selector_reg;
-			unsigned win_min = config->ranges[j].window_start;
-			unsigned win_max = win_min +
-					   config->ranges[j].window_len - 1;
+			unsigned int sel_reg = config->ranges[j].selector_reg;
+			unsigned int win_min = config->ranges[j].window_start;
+			unsigned int win_max = win_min +
+					       config->ranges[j].window_len - 1;
 
 			/* Allow data window inside its own virtual range */
 			if (j == i)
@@ -1211,6 +1173,8 @@ err_name:
 err_map:
 	kfree(map);
 err:
+	if (bus && bus->free_on_exit)
+		kfree(bus);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(__regmap_init);
@@ -1253,6 +1217,9 @@ static void regmap_field_init(struct regmap_field *rm_field,
 	rm_field->reg = reg_field.reg;
 	rm_field->shift = reg_field.lsb;
 	rm_field->mask = GENMASK(reg_field.msb, reg_field.lsb);
+
+	WARN_ONCE(rm_field->mask == 0, "invalid empty mask defined\n");
+
 	rm_field->id_size = reg_field.id_size;
 	rm_field->id_offset = reg_field.id_offset;
 }
@@ -1298,7 +1265,7 @@ EXPORT_SYMBOL_GPL(devm_regmap_field_alloc);
  */
 int regmap_field_bulk_alloc(struct regmap *regmap,
 			    struct regmap_field **rm_field,
-			    struct reg_field *reg_field,
+			    const struct reg_field *reg_field,
 			    int num_fields)
 {
 	struct regmap_field *rf;
@@ -1334,7 +1301,7 @@ EXPORT_SYMBOL_GPL(regmap_field_bulk_alloc);
 int devm_regmap_field_bulk_alloc(struct device *dev,
 				 struct regmap *regmap,
 				 struct regmap_field **rm_field,
-				 struct reg_field *reg_field,
+				 const struct reg_field *reg_field,
 				 int num_fields)
 {
 	struct regmap_field *rf;
@@ -1459,6 +1426,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
 	regmap_debugfs_exit(map);
 
 	map->max_register = config->max_register;
+	map->max_register_is_set = map->max_register ?: config->max_register_is_0;
 	map->writeable_reg = config->writeable_reg;
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
@@ -1489,7 +1457,9 @@ void regmap_exit(struct regmap *map)
 {
 	struct regmap_async *async;
 
+	regmap_detach_dev(map->dev, map);
 	regcache_exit(map);
+
 	regmap_debugfs_exit(map);
 	regmap_range_exit(map);
 	if (map->bus && map->bus->free_context)
@@ -1525,7 +1495,7 @@ static int dev_get_regmap_match(struct device *dev, void *res, void *data)
 
 	/* If the user didn't specify a name match any */
 	if (data)
-		return !strcmp((*r)->name, data);
+		return (*r)->name && !strcmp((*r)->name, data);
 	else
 		return 1;
 }
@@ -1629,6 +1599,18 @@ static void regmap_set_work_buf_flag_mask(struct regmap *map, int max_bytes,
 		buf[i] |= (mask >> (8 * i)) & 0xff;
 }
 
+static unsigned int regmap_reg_addr(struct regmap *map, unsigned int reg)
+{
+	reg += map->reg_base;
+
+	if (map->format.reg_shift > 0)
+		reg >>= map->format.reg_shift;
+	else if (map->format.reg_shift < 0)
+		reg <<= -(map->format.reg_shift);
+
+	return reg;
+}
+
 static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 				  const void *val, size_t val_len, bool noinc)
 {
@@ -1641,8 +1623,6 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 	size_t len;
 	int i;
 
-	WARN_ON(!map->bus);
-
 	/* Check for unwritable or noinc registers in range
 	 * before we start
 	 */
@@ -1657,17 +1637,19 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 	}
 
 	if (!map->cache_bypass && map->format.parse_val) {
-		unsigned int ival;
+		unsigned int ival, offset;
 		int val_bytes = map->format.val_bytes;
-		for (i = 0; i < val_len / val_bytes; i++) {
-			ival = map->format.parse_val(val + (i * val_bytes));
-			ret = regcache_write(map,
-					     reg + regmap_get_offset(map, i),
-					     ival);
+
+		/* Cache the last written value for noinc writes */
+		i = noinc ? val_len - val_bytes : 0;
+		for (; i < val_len; i += val_bytes) {
+			ival = map->format.parse_val(val + i);
+			offset = noinc ? 0 : regmap_get_offset(map, i / val_bytes);
+			ret = regcache_write(map, reg + offset, ival);
 			if (ret) {
 				dev_err(map->dev,
 					"Error in caching of register: %x ret: %d\n",
-					reg + i, ret);
+					reg + offset, ret);
 				return ret;
 			}
 		}
@@ -1708,6 +1690,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 			return ret;
 	}
 
+	reg = regmap_reg_addr(map, reg);
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
 				      map->write_flag_mask);
@@ -1722,7 +1705,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 		val = work_val;
 	}
 
-	if (map->async && map->bus->async_write) {
+	if (map->async && map->bus && map->bus->async_write) {
 		struct regmap_async *async;
 
 		trace_regmap_async_write_start(map, reg, val_len);
@@ -1790,11 +1773,11 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 	 * write.
 	 */
 	if (val == work_val)
-		ret = map->bus->write(map->bus_context, map->work_buf,
-				      map->format.reg_bytes +
-				      map->format.pad_bytes +
-				      val_len);
-	else if (map->bus->gather_write)
+		ret = map->write(map->bus_context, map->work_buf,
+				 map->format.reg_bytes +
+				 map->format.pad_bytes +
+				 val_len);
+	else if (map->bus && map->bus->gather_write)
 		ret = map->bus->gather_write(map->bus_context, map->work_buf,
 					     map->format.reg_bytes +
 					     map->format.pad_bytes,
@@ -1812,7 +1795,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 		memcpy(buf, map->work_buf, map->format.reg_bytes);
 		memcpy(buf + map->format.reg_bytes + map->format.pad_bytes,
 		       val, val_len);
-		ret = map->bus->write(map->bus_context, buf, len);
+		ret = map->write(map->bus_context, buf, len);
 
 		kfree(buf);
 	} else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
@@ -1835,8 +1818,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
  */
 bool regmap_can_raw_write(struct regmap *map)
 {
-	return map->bus && map->bus->write && map->format.format_val &&
-		map->format.format_reg;
+	return map->write && map->format.format_val && map->format.format_reg;
 }
 EXPORT_SYMBOL_GPL(regmap_can_raw_write);
 
@@ -1869,7 +1851,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 	struct regmap_range_node *range;
 	struct regmap *map = context;
 
-	WARN_ON(!map->bus || !map->format.format_write);
+	WARN_ON(!map->format.format_write);
 
 	range = _regmap_range_lookup(map, reg);
 	if (range) {
@@ -1878,12 +1860,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 			return ret;
 	}
 
+	reg = regmap_reg_addr(map, reg);
 	map->format.format_write(map, reg, val);
 
 	trace_regmap_hw_write_start(map, reg, 1);
 
-	ret = map->bus->write(map->bus_context, map->work_buf,
-			      map->format.buf_size);
+	ret = map->write(map->bus_context, map->work_buf, map->format.buf_size);
 
 	trace_regmap_hw_write_done(map, reg, 1);
 
@@ -1894,7 +1876,17 @@ static int _regmap_bus_reg_write(void *context, unsigned int reg,
 				 unsigned int val)
 {
 	struct regmap *map = context;
+	struct regmap_range_node *range;
+	int ret;
 
+	range = _regmap_range_lookup(map, reg);
+	if (range) {
+		ret = _regmap_select_page(map, &reg, range, 1);
+		if (ret != 0)
+			return ret;
+	}
+
+	reg = regmap_reg_addr(map, reg);
 	return map->bus->reg_write(map->bus_context, reg, val);
 }
 
@@ -1903,7 +1895,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 {
 	struct regmap *map = context;
 
-	WARN_ON(!map->bus || !map->format.format_val);
+	WARN_ON(!map->format.format_val);
 
 	map->format.format_val(map->work_buf + map->format.reg_bytes
 			       + map->format.pad_bytes, val, 0);
@@ -1917,7 +1909,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 
 static inline void *_regmap_map_get_context(struct regmap *map)
 {
-	return (map->bus) ? map : map->bus_context;
+	return (map->bus || (!map->bus && map->read)) ? map : map->bus_context;
 }
 
 int _regmap_write(struct regmap *map, unsigned int reg,
@@ -2082,8 +2074,86 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_raw_write);
 
+static int regmap_noinc_readwrite(struct regmap *map, unsigned int reg,
+				  void *val, unsigned int val_len, bool write)
+{
+	size_t val_bytes = map->format.val_bytes;
+	size_t val_count = val_len / val_bytes;
+	unsigned int lastval;
+	u8 *u8p;
+	u16 *u16p;
+	u32 *u32p;
+	int ret;
+	int i;
+
+	switch (val_bytes) {
+	case 1:
+		u8p = val;
+		if (write)
+			lastval = (unsigned int)u8p[val_count - 1];
+		break;
+	case 2:
+		u16p = val;
+		if (write)
+			lastval = (unsigned int)u16p[val_count - 1];
+		break;
+	case 4:
+		u32p = val;
+		if (write)
+			lastval = (unsigned int)u32p[val_count - 1];
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Update the cache with the last value we write, the rest is just
+	 * gone down in the hardware FIFO. We can't cache FIFOs. This makes
+	 * sure a single read from the cache will work.
+	 */
+	if (write) {
+		if (!map->cache_bypass && !map->defer_caching) {
+			ret = regcache_write(map, reg, lastval);
+			if (ret != 0)
+				return ret;
+			if (map->cache_only) {
+				map->cache_dirty = true;
+				return 0;
+			}
+		}
+		ret = map->bus->reg_noinc_write(map->bus_context, reg, val, val_count);
+	} else {
+		ret = map->bus->reg_noinc_read(map->bus_context, reg, val, val_count);
+	}
+
+	if (!ret && regmap_should_log(map)) {
+		dev_info(map->dev, "%x %s [", reg, write ? "<=" : "=>");
+		for (i = 0; i < val_count; i++) {
+			switch (val_bytes) {
+			case 1:
+				pr_cont("%x", u8p[i]);
+				break;
+			case 2:
+				pr_cont("%x", u16p[i]);
+				break;
+			case 4:
+				pr_cont("%x", u32p[i]);
+				break;
+			default:
+				break;
+			}
+			if (i == (val_count - 1))
+				pr_cont("]\n");
+			else
+				pr_cont(",");
+		}
+	}
+
+	return 0;
+}
+
 /**
- * regmap_noinc_write(): Write data from a register without incrementing the
+ * regmap_noinc_write(): Write data to a register without incrementing the
  *			register number
  *
  * @map: Register map to write to
@@ -2109,10 +2179,8 @@ int regmap_noinc_write(struct regmap *map, unsigned int reg,
 	size_t write_len;
 	int ret;
 
-	if (!map->bus)
+	if (!map->write && !(map->bus && map->bus->reg_noinc_write))
 		return -EINVAL;
-	if (!map->bus->write)
-		return -ENOTSUPP;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (!IS_ALIGNED(reg, map->reg_stride))
@@ -2127,6 +2195,15 @@ int regmap_noinc_write(struct regmap *map, unsigned int reg,
 		goto out_unlock;
 	}
 
+	/*
+	 * Use the accelerated operation if we can. The val drops the const
+	 * typing in order to facilitate code reuse in regmap_noinc_readwrite().
+	 */
+	if (map->bus->reg_noinc_write) {
+		ret = regmap_noinc_readwrite(map, reg, (void *)val, val_len, true);
+		goto out_unlock;
+	}
+
 	while (val_len) {
 		if (map->max_raw_write && map->max_raw_write < val_len)
 			write_len = map->max_raw_write;
@@ -2175,6 +2252,30 @@ int regmap_field_update_bits_base(struct regmap_field *field,
 EXPORT_SYMBOL_GPL(regmap_field_update_bits_base);
 
 /**
+ * regmap_field_test_bits() - Check if all specified bits are set in a
+ *                            register field.
+ *
+ * @field: Register field to operate on
+ * @bits: Bits to test
+ *
+ * Returns negative errno if the underlying regmap_field_read() fails,
+ * 0 if at least one of the tested bits is not set and 1 if all tested
+ * bits are set.
+ */
+int regmap_field_test_bits(struct regmap_field *field, unsigned int bits)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_field_read(field, &val);
+	if (ret)
+		return ret;
+
+	return (val & bits) == bits;
+}
+EXPORT_SYMBOL_GPL(regmap_field_test_bits);
+
+/**
  * regmap_fields_update_bits_base() - Perform a read/modify/write cycle a
  *                                    register field with port ID
  *
@@ -2232,7 +2333,7 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	 * Some devices don't support bulk write, for them we have a series of
 	 * single write operations.
 	 */
-	if (!map->bus || !map->format.parse_inplace) {
+	if (!map->write || !map->format.parse_inplace) {
 		map->lock(map->lock_arg);
 		for (i = 0; i < val_count; i++) {
 			unsigned int ival;
@@ -2247,11 +2348,6 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			case 4:
 				ival = *(u32 *)(val + (i * val_bytes));
 				break;
-#ifdef CONFIG_64BIT
-			case 8:
-				ival = *(u64 *)(val + (i * val_bytes));
-				break;
-#endif
 			default:
 				ret = -EINVAL;
 				goto out;
@@ -2268,7 +2364,7 @@ out:
 	} else {
 		void *wval;
 
-		wval = kmemdup(val, val_count * val_bytes, map->alloc_flags);
+		wval = kmemdup_array(val, val_count, val_bytes, map->alloc_flags);
 		if (!wval)
 			return -ENOMEM;
 
@@ -2279,6 +2375,10 @@ out:
 
 		kfree(wval);
 	}
+
+	if (!ret)
+		trace_regmap_bulk_write(map, reg, val, val_bytes * val_count);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_write);
@@ -2319,6 +2419,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 		unsigned int reg = regs[i].reg;
 		unsigned int val = regs[i].def;
 		trace_regmap_hw_write_start(map, reg, 1);
+		reg = regmap_reg_addr(map, reg);
 		map->format.format_reg(u8, reg, map->reg_shift);
 		u8 += reg_bytes + pad_bytes;
 		map->format.format_val(u8, val, 0);
@@ -2327,7 +2428,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 	u8 = buf;
 	*u8 |= map->write_flag_mask;
 
-	ret = map->bus->write(map->bus_context, buf, len);
+	ret = map->write(map->bus_context, buf, len);
 
 	kfree(buf);
 
@@ -2633,9 +2734,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	struct regmap_range_node *range;
 	int ret;
 
-	WARN_ON(!map->bus);
-
-	if (!map->bus || !map->bus->read)
+	if (!map->read)
 		return -EINVAL;
 
 	range = _regmap_range_lookup(map, reg);
@@ -2646,14 +2745,15 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 			return ret;
 	}
 
+	reg = regmap_reg_addr(map, reg);
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
 				      map->read_flag_mask);
 	trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
 
-	ret = map->bus->read(map->bus_context, map->work_buf,
-			     map->format.reg_bytes + map->format.pad_bytes,
-			     val, val_len);
+	ret = map->read(map->bus_context, map->work_buf,
+			map->format.reg_bytes + map->format.pad_bytes,
+			val, val_len);
 
 	trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
 
@@ -2664,7 +2764,17 @@ static int _regmap_bus_reg_read(void *context, unsigned int reg,
 				unsigned int *val)
 {
 	struct regmap *map = context;
+	struct regmap_range_node *range;
+	int ret;
 
+	range = _regmap_range_lookup(map, reg);
+	if (range) {
+		ret = _regmap_select_page(map, &reg, range, 1);
+		if (ret != 0)
+			return ret;
+	}
+
+	reg = regmap_reg_addr(map, reg);
 	return map->bus->reg_read(map->bus_context, reg, val);
 }
 
@@ -2746,6 +2856,43 @@ int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val)
 EXPORT_SYMBOL_GPL(regmap_read);
 
 /**
+ * regmap_read_bypassed() - Read a value from a single register direct
+ *			    from the device, bypassing the cache
+ *
+ * @map: Register map to read from
+ * @reg: Register to be read from
+ * @val: Pointer to store read value
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val)
+{
+	int ret;
+	bool bypass, cache_only;
+
+	if (!IS_ALIGNED(reg, map->reg_stride))
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	bypass = map->cache_bypass;
+	cache_only = map->cache_only;
+	map->cache_bypass = true;
+	map->cache_only = false;
+
+	ret = _regmap_read(map, reg, val);
+
+	map->cache_bypass = bypass;
+	map->cache_only = cache_only;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_read_bypassed);
+
+/**
  * regmap_raw_read() - Read raw data from the device
  *
  * @map: Register map to read from
@@ -2764,8 +2911,6 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	unsigned int v;
 	int ret, i;
 
-	if (!map->bus)
-		return -EINVAL;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (!IS_ALIGNED(reg, map->reg_stride))
@@ -2780,7 +2925,12 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 		size_t chunk_count, chunk_bytes;
 		size_t chunk_regs = val_count;
 
-		if (!map->bus->read) {
+		if (!map->cache_bypass && map->cache_only) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		if (!map->read) {
 			ret = -ENOTSUPP;
 			goto out;
 		}
@@ -2840,7 +2990,7 @@ EXPORT_SYMBOL_GPL(regmap_raw_read);
  * @val: Pointer to data buffer
  * @val_len: Length of output buffer in bytes.
  *
- * The regmap API usually assumes that bulk bus read operations will read a
+ * The regmap API usually assumes that bulk read operations will read a
  * range of registers. Some devices have certain registers for which a read
  * operation read will read from an internal FIFO.
  *
@@ -2858,10 +3008,9 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
 	size_t read_len;
 	int ret;
 
-	if (!map->bus)
-		return -EINVAL;
-	if (!map->bus->read)
+	if (!map->read)
 		return -ENOTSUPP;
+
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (!IS_ALIGNED(reg, map->reg_stride))
@@ -2876,6 +3025,23 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
 		goto out_unlock;
 	}
 
+	/*
+	 * We have not defined the FIFO semantics for cache, as the
+	 * cache is just one value deep. Should we return the last
+	 * written value? Just avoid this by always reading the FIFO
+	 * even when using cache. Cache only will not work.
+	 */
+	if (!map->cache_bypass && map->cache_only) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	/* Use the accelerated operation if we can */
+	if (map->bus->reg_noinc_read) {
+		ret = regmap_noinc_readwrite(map, reg, val, val_len, false);
+		goto out_unlock;
+	}
+
 	while (val_len) {
 		if (map->max_raw_read && map->max_raw_read < val_len)
 			read_len = map->max_raw_read;
@@ -2952,8 +3118,53 @@ int regmap_fields_read(struct regmap_field *field, unsigned int id,
 }
 EXPORT_SYMBOL_GPL(regmap_fields_read);
 
+static int _regmap_bulk_read(struct regmap *map, unsigned int reg,
+			     const unsigned int *regs, void *val, size_t val_count)
+{
+	u32 *u32 = val;
+	u16 *u16 = val;
+	u8 *u8 = val;
+	int ret, i;
+
+	map->lock(map->lock_arg);
+
+	for (i = 0; i < val_count; i++) {
+		unsigned int ival;
+
+		if (regs) {
+			if (!IS_ALIGNED(regs[i], map->reg_stride)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			ret = _regmap_read(map, regs[i], &ival);
+		} else {
+			ret = _regmap_read(map, reg + regmap_get_offset(map, i), &ival);
+		}
+		if (ret != 0)
+			goto out;
+
+		switch (map->format.val_bytes) {
+		case 4:
+			u32[i] = ival;
+			break;
+		case 2:
+			u16[i] = ival;
+			break;
+		case 1:
+			u8[i] = ival;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+out:
+	map->unlock(map->lock_arg);
+	return ret;
+}
+
 /**
- * regmap_bulk_read() - Read multiple registers from the device
+ * regmap_bulk_read() - Read multiple sequential registers from the device
  *
  * @map: Register map to read from
  * @reg: First register to be read from
@@ -2975,7 +3186,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 	if (val_count == 0)
 		return -EINVAL;
 
-	if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
+	if (map->read && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
 		ret = regmap_raw_read(map, reg, val, val_bytes * val_count);
 		if (ret != 0)
 			return ret;
@@ -2983,52 +3194,35 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		for (i = 0; i < val_count * val_bytes; i += val_bytes)
 			map->format.parse_inplace(val + i);
 	} else {
-#ifdef CONFIG_64BIT
-		u64 *u64 = val;
-#endif
-		u32 *u32 = val;
-		u16 *u16 = val;
-		u8 *u8 = val;
-
-		map->lock(map->lock_arg);
-
-		for (i = 0; i < val_count; i++) {
-			unsigned int ival;
-
-			ret = _regmap_read(map, reg + regmap_get_offset(map, i),
-					   &ival);
-			if (ret != 0)
-				goto out;
-
-			switch (map->format.val_bytes) {
-#ifdef CONFIG_64BIT
-			case 8:
-				u64[i] = ival;
-				break;
-#endif
-			case 4:
-				u32[i] = ival;
-				break;
-			case 2:
-				u16[i] = ival;
-				break;
-			case 1:
-				u8[i] = ival;
-				break;
-			default:
-				ret = -EINVAL;
-				goto out;
-			}
-		}
-
-out:
-		map->unlock(map->lock_arg);
+		ret = _regmap_bulk_read(map, reg, NULL, val, val_count);
 	}
-
+	if (!ret)
+		trace_regmap_bulk_read(map, reg, val, val_bytes * val_count);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_read);
 
+/**
+ * regmap_multi_reg_read() - Read multiple non-sequential registers from the device
+ *
+ * @map: Register map to read from
+ * @regs: Array of registers to read from
+ * @val: Pointer to store read value, in native register size for device
+ * @val_count: Number of registers to read
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_multi_reg_read(struct regmap *map, const unsigned int *regs, void *val,
+			  size_t val_count)
+{
+	if (val_count == 0)
+		return -EINVAL;
+
+	return _regmap_bulk_read(map, 0, regs, val, val_count);
+}
+EXPORT_SYMBOL_GPL(regmap_multi_reg_read);
+
 static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 			       unsigned int mask, unsigned int val,
 			       bool *change, bool force_write)
@@ -3040,6 +3234,7 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 		*change = false;
 
 	if (regmap_volatile(map, reg) && map->reg_update_bits) {
+		reg = regmap_reg_addr(map, reg);
 		ret = map->reg_update_bits(map->bus_context, reg, mask, val);
 		if (ret == 0 && change)
 			*change = true;
@@ -3051,7 +3246,7 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 		tmp = orig & ~mask;
 		tmp |= val & mask;
 
-		if (force_write || (tmp != orig)) {
+		if (force_write || (tmp != orig) || map->force_write_field) {
 			ret = _regmap_write(map, reg, tmp);
 			if (ret == 0 && change)
 				*change = true;
@@ -3116,7 +3311,8 @@ EXPORT_SYMBOL_GPL(regmap_update_bits_base);
  */
 int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits)
 {
-	unsigned int val, ret;
+	unsigned int val;
+	int ret;
 
 	ret = regmap_read(map, reg, &val);
 	if (ret)
@@ -3277,7 +3473,7 @@ EXPORT_SYMBOL_GPL(regmap_get_val_bytes);
  */
 int regmap_get_max_register(struct regmap *map)
 {
-	return map->max_register ? map->max_register : -EINVAL;
+	return map->max_register_is_set ? map->max_register : -EINVAL;
 }
 EXPORT_SYMBOL_GPL(regmap_get_max_register);
 
@@ -3295,6 +3491,19 @@ int regmap_get_reg_stride(struct regmap *map)
 }
 EXPORT_SYMBOL_GPL(regmap_get_reg_stride);
 
+/**
+ * regmap_might_sleep() - Returns whether a regmap access might sleep.
+ *
+ * @map: Register map to operate on.
+ *
+ * Returns true if an access to the register might sleep, else false.
+ */
+bool regmap_might_sleep(struct regmap *map)
+{
+	return map->can_sleep;
+}
+EXPORT_SYMBOL_GPL(regmap_might_sleep);
+
 int regmap_parse_val(struct regmap *map, const void *buf,
 			unsigned int *val)
 {
diff --git a/drivers/base/regmap/trace.h b/drivers/base/regmap/trace.h
index 9abee14df9ee..bcc5a8b226a6 100644
--- a/drivers/base/regmap/trace.h
+++ b/drivers/base/regmap/trace.h
@@ -27,14 +27,12 @@ DECLARE_EVENT_CLASS(regmap_reg,
 	),
 
 	TP_fast_assign(
-		__assign_str(name, regmap_name(map));
+		__assign_str(name);
 		__entry->reg = reg;
 		__entry->val = val;
 	),
 
-	TP_printk("%s reg=%x val=%x", __get_str(name),
-		  (unsigned int)__entry->reg,
-		  (unsigned int)__entry->val)
+	TP_printk("%s reg=%x val=%x", __get_str(name), __entry->reg, __entry->val)
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_write,
@@ -43,7 +41,6 @@ DEFINE_EVENT(regmap_reg, regmap_reg_write,
 		 unsigned int val),
 
 	TP_ARGS(map, reg, val)
-
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_read,
@@ -52,7 +49,6 @@ DEFINE_EVENT(regmap_reg, regmap_reg_read,
 		 unsigned int val),
 
 	TP_ARGS(map, reg, val)
-
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
@@ -61,7 +57,47 @@ DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
 		 unsigned int val),
 
 	TP_ARGS(map, reg, val)
+);
+
+DECLARE_EVENT_CLASS(regmap_bulk,
 
+	TP_PROTO(struct regmap *map, unsigned int reg,
+		 const void *val, int val_len),
+
+	TP_ARGS(map, reg, val, val_len),
+
+	TP_STRUCT__entry(
+		__string(name, regmap_name(map))
+		__field(unsigned int, reg)
+		__dynamic_array(char, buf, val_len)
+		__field(int, val_len)
+	),
+
+	TP_fast_assign(
+		__assign_str(name);
+		__entry->reg = reg;
+		__entry->val_len = val_len;
+		memcpy(__get_dynamic_array(buf), val, val_len);
+	),
+
+	TP_printk("%s reg=%x val=%s", __get_str(name), __entry->reg,
+		  __print_hex(__get_dynamic_array(buf), __entry->val_len))
+);
+
+DEFINE_EVENT(regmap_bulk, regmap_bulk_write,
+
+	TP_PROTO(struct regmap *map, unsigned int reg,
+		 const void *val, int val_len),
+
+	TP_ARGS(map, reg, val, val_len)
+);
+
+DEFINE_EVENT(regmap_bulk, regmap_bulk_read,
+
+	TP_PROTO(struct regmap *map, unsigned int reg,
+		 const void *val, int val_len),
+
+	TP_ARGS(map, reg, val, val_len)
 );
 
 DECLARE_EVENT_CLASS(regmap_block,
@@ -77,14 +113,12 @@ DECLARE_EVENT_CLASS(regmap_block,
 	),
 
 	TP_fast_assign(
-		__assign_str(name, regmap_name(map));
+		__assign_str(name);
 		__entry->reg = reg;
 		__entry->count = count;
 	),
 
-	TP_printk("%s reg=%x count=%d", __get_str(name),
-		  (unsigned int)__entry->reg,
-		  (int)__entry->count)
+	TP_printk("%s reg=%x count=%d", __get_str(name), __entry->reg, __entry->count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_read_start,
@@ -129,9 +163,9 @@ TRACE_EVENT(regcache_sync,
 	),
 
 	TP_fast_assign(
-		__assign_str(name, regmap_name(map));
-		__assign_str(status, status);
-		__assign_str(type, type);
+		__assign_str(name);
+		__assign_str(status);
+		__assign_str(type);
 	),
 
 	TP_printk("%s type=%s status=%s", __get_str(name),
@@ -150,12 +184,11 @@ DECLARE_EVENT_CLASS(regmap_bool,
 	),
 
 	TP_fast_assign(
-		__assign_str(name, regmap_name(map));
+		__assign_str(name);
 		__entry->flag = flag;
 	),
 
-	TP_printk("%s flag=%d", __get_str(name),
-		  (int)__entry->flag)
+	TP_printk("%s flag=%d", __get_str(name), __entry->flag)
 );
 
 DEFINE_EVENT(regmap_bool, regmap_cache_only,
@@ -163,7 +196,6 @@ DEFINE_EVENT(regmap_bool, regmap_cache_only,
 	TP_PROTO(struct regmap *map, bool flag),
 
 	TP_ARGS(map, flag)
-
 );
 
 DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
@@ -171,7 +203,6 @@ DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
 	TP_PROTO(struct regmap *map, bool flag),
 
 	TP_ARGS(map, flag)
-
 );
 
 DECLARE_EVENT_CLASS(regmap_async,
@@ -185,7 +216,7 @@ DECLARE_EVENT_CLASS(regmap_async,
 	),
 
 	TP_fast_assign(
-		__assign_str(name, regmap_name(map));
+		__assign_str(name);
 	),
 
 	TP_printk("%s", __get_str(name))
@@ -203,7 +234,6 @@ DEFINE_EVENT(regmap_async, regmap_async_io_complete,
 	TP_PROTO(struct regmap *map),
 
 	TP_ARGS(map)
-
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_complete_start,
@@ -211,7 +241,6 @@ DEFINE_EVENT(regmap_async, regmap_async_complete_start,
 	TP_PROTO(struct regmap *map),
 
 	TP_ARGS(map)
-
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_complete_done,
@@ -219,7 +248,6 @@ DEFINE_EVENT(regmap_async, regmap_async_complete_done,
 	TP_PROTO(struct regmap *map),
 
 	TP_ARGS(map)
-
 );
 
 TRACE_EVENT(regcache_drop_region,
@@ -236,13 +264,12 @@ TRACE_EVENT(regcache_drop_region,
 	),
 
 	TP_fast_assign(
-		__assign_str(name, regmap_name(map));
+		__assign_str(name);
 		__entry->from = from;
 		__entry->to = to;
 	),
 
-	TP_printk("%s %u-%u", __get_str(name), (unsigned int)__entry->from,
-		  (unsigned int)__entry->to)
+	TP_printk("%s %u-%u", __get_str(name), __entry->from, __entry->to)
 );
 
 #endif /* _TRACE_REGMAP_H */
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 0af5363a582c..282c38aece0d 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -7,6 +7,7 @@
 
 #include <linux/sysfs.h>
 #include <linux/init.h>
+#include <linux/of.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
@@ -27,9 +28,10 @@ struct soc_device {
 	int soc_dev_num;
 };
 
-static struct bus_type soc_bus_type = {
+static const struct bus_type soc_bus_type = {
 	.name  = "soc",
 };
+static bool soc_bus_registered;
 
 static DEVICE_ATTR(machine,		0444, soc_info_show,  NULL);
 static DEVICE_ATTR(family,		0444, soc_info_show,  NULL);
@@ -104,11 +106,23 @@ static void soc_release(struct device *dev)
 {
 	struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
 
-	ida_simple_remove(&soc_ida, soc_dev->soc_dev_num);
+	ida_free(&soc_ida, soc_dev->soc_dev_num);
 	kfree(soc_dev->dev.groups);
 	kfree(soc_dev);
 }
 
+static void soc_device_get_machine(struct soc_device_attribute *soc_dev_attr)
+{
+	struct device_node *np;
+
+	if (soc_dev_attr->machine)
+		return;
+
+	np = of_find_node_by_path("/");
+	of_property_read_string(np, "model", &soc_dev_attr->machine);
+	of_node_put(np);
+}
+
 static struct soc_device_attribute *early_soc_dev_attr;
 
 struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr)
@@ -117,7 +131,9 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr
 	const struct attribute_group **soc_attr_groups;
 	int ret;
 
-	if (!soc_bus_type.p) {
+	soc_device_get_machine(soc_dev_attr);
+
+	if (!soc_bus_registered) {
 		if (early_soc_dev_attr)
 			return ERR_PTR(-EBUSY);
 		early_soc_dev_attr = soc_dev_attr;
@@ -139,7 +155,7 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr
 	soc_attr_groups[1] = soc_dev_attr->custom_attr_group;
 
 	/* Fetch a unique (reclaimable) SOC ID. */
-	ret = ida_simple_get(&soc_ida, 0, 0, GFP_KERNEL);
+	ret = ida_alloc(&soc_ida, GFP_KERNEL);
 	if (ret < 0)
 		goto out3;
 	soc_dev->soc_dev_num = ret;
@@ -183,6 +199,7 @@ static int __init soc_bus_register(void)
 	ret = bus_register(&soc_bus_type);
 	if (ret)
 		return ret;
+	soc_bus_registered = true;
 
 	if (early_soc_dev_attr)
 		return PTR_ERR(soc_device_register(early_soc_dev_attr));
@@ -241,15 +258,13 @@ static int soc_device_match_one(struct device *dev, void *arg)
 const struct soc_device_attribute *soc_device_match(
 	const struct soc_device_attribute *matches)
 {
-	int ret = 0;
+	int ret;
 
 	if (!matches)
 		return NULL;
 
-	while (!ret) {
-		if (!(matches->machine || matches->family ||
-		      matches->revision || matches->soc_id))
-			break;
+	while (matches->machine || matches->family || matches->revision ||
+	       matches->soc_id) {
 		ret = bus_for_each_dev(&soc_bus_type, NULL, (void *)matches,
 				       soc_device_match_one);
 		if (ret < 0 && early_soc_dev_attr)
@@ -257,10 +272,10 @@ const struct soc_device_attribute *soc_device_match(
 						    matches);
 		if (ret < 0)
 			return NULL;
-		if (!ret)
-			matches++;
-		else
+		if (ret)
 			return matches;
+
+		matches++;
 	}
 	return NULL;
 }
diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index d1f1a8240120..16a8301c25d6 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -6,10 +6,23 @@
  * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
  */
 
+#include <linux/container_of.h>
 #include <linux/device.h>
-#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/kstrtox.h>
+#include <linux/list.h>
 #include <linux/property.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include "base.h"
 
 struct swnode {
 	struct kobject kobj;
@@ -411,9 +424,6 @@ software_node_get_name(const struct fwnode_handle *fwnode)
 {
 	const struct swnode *swnode = to_swnode(fwnode);
 
-	if (!swnode)
-		return "(null)";
-
 	return kobject_name(&swnode->kobj);
 }
 
@@ -505,9 +515,6 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode,
 	int error;
 	int i;
 
-	if (!swnode)
-		return -ENOENT;
-
 	prop = property_entry_get(swnode->node->properties, propname);
 	if (!prop)
 		return -ENOENT;
@@ -522,20 +529,35 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode,
 	if (prop->is_inline)
 		return -EINVAL;
 
-	if (index * sizeof(*ref) >= prop->length)
+	if ((index + 1) * sizeof(*ref) > prop->length)
 		return -ENOENT;
 
 	ref_array = prop->pointer;
 	ref = &ref_array[index];
 
-	refnode = software_node_fwnode(ref->node);
+	/*
+	 * A software node can reference other software nodes or firmware
+	 * nodes (which are the abstraction layer sitting on top of them).
+	 * This is done to ensure we can create references to static software
+	 * nodes before they're registered with the firmware node framework.
+	 * At the time the reference is being resolved, we expect the swnodes
+	 * in question to already have been registered and to be backed by
+	 * a firmware node. This is why we use the fwnode API below to read the
+	 * relevant properties and bump the reference count.
+	 */
+
+	if (ref->swnode)
+		refnode = software_node_fwnode(ref->swnode);
+	else if (ref->fwnode)
+		refnode = ref->fwnode;
+	else
+		return -EINVAL;
+
 	if (!refnode)
 		return -ENOENT;
 
 	if (nargs_prop) {
-		error = property_entry_read_int_array(swnode->node->properties,
-						      nargs_prop, sizeof(u32),
-						      &nargs_prop_val, 1);
+		error = fwnode_property_read_u32(refnode, nargs_prop, &nargs_prop_val);
 		if (error)
 			return error;
 
@@ -545,7 +567,10 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode,
 	if (nargs > NR_FWNODE_REFERENCE_ARGS)
 		return -EINVAL;
 
-	args->fwnode = software_node_get(refnode);
+	if (!args)
+		return 0;
+
+	args->fwnode = fwnode_handle_get(refnode);
 	args->nargs = nargs;
 
 	for (i = 0; i < nargs; i++)
@@ -625,7 +650,10 @@ software_node_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
 
 	ref = prop->pointer;
 
-	return software_node_get(software_node_fwnode(ref[0].node));
+	if (!ref->swnode)
+		return NULL;
+
+	return software_node_get(software_node_fwnode(ref->swnode));
 }
 
 static struct fwnode_handle *
@@ -667,6 +695,7 @@ static const struct fwnode_operations software_node_ops = {
 	.get = software_node_get,
 	.put = software_node_put,
 	.property_present = software_node_property_present,
+	.property_read_bool = software_node_property_present,
 	.property_read_int_array = software_node_read_int_array,
 	.property_read_string_array = software_node_read_string_array,
 	.get_name = software_node_get_name,
@@ -751,10 +780,10 @@ static void software_node_release(struct kobject *kobj)
 	struct swnode *swnode = kobj_to_swnode(kobj);
 
 	if (swnode->parent) {
-		ida_simple_remove(&swnode->parent->child_ids, swnode->id);
+		ida_free(&swnode->parent->child_ids, swnode->id);
 		list_del(&swnode->entry);
 	} else {
-		ida_simple_remove(&swnode_root_ids, swnode->id);
+		ida_free(&swnode_root_ids, swnode->id);
 	}
 
 	if (swnode->allocated)
@@ -764,7 +793,7 @@ static void software_node_release(struct kobject *kobj)
 	kfree(swnode);
 }
 
-static struct kobj_type software_node_type = {
+static const struct kobj_type software_node_type = {
 	.release = software_node_release,
 	.sysfs_ops = &kobj_sysfs_ops,
 };
@@ -780,8 +809,8 @@ swnode_register(const struct software_node *node, struct swnode *parent,
 	if (!swnode)
 		return ERR_PTR(-ENOMEM);
 
-	ret = ida_simple_get(parent ? &parent->child_ids : &swnode_root_ids,
-			     0, 0, GFP_KERNEL);
+	ret = ida_alloc(parent ? &parent->child_ids : &swnode_root_ids,
+			GFP_KERNEL);
 	if (ret < 0) {
 		kfree(swnode);
 		return ERR_PTR(ret);
@@ -824,67 +853,6 @@ swnode_register(const struct software_node *node, struct swnode *parent,
 }
 
 /**
- * software_node_register_nodes - Register an array of software nodes
- * @nodes: Zero terminated array of software nodes to be registered
- *
- * Register multiple software nodes at once. If any node in the array
- * has its .parent pointer set (which can only be to another software_node),
- * then its parent **must** have been registered before it is; either outside
- * of this function or by ordering the array such that parent comes before
- * child.
- */
-int software_node_register_nodes(const struct software_node *nodes)
-{
-	int ret;
-	int i;
-
-	for (i = 0; nodes[i].name; i++) {
-		const struct software_node *parent = nodes[i].parent;
-
-		if (parent && !software_node_to_swnode(parent)) {
-			ret = -EINVAL;
-			goto err_unregister_nodes;
-		}
-
-		ret = software_node_register(&nodes[i]);
-		if (ret)
-			goto err_unregister_nodes;
-	}
-
-	return 0;
-
-err_unregister_nodes:
-	software_node_unregister_nodes(nodes);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(software_node_register_nodes);
-
-/**
- * software_node_unregister_nodes - Unregister an array of software nodes
- * @nodes: Zero terminated array of software nodes to be unregistered
- *
- * Unregister multiple software nodes at once. If parent pointers are set up
- * in any of the software nodes then the array **must** be ordered such that
- * parents come before their children.
- *
- * NOTE: If you are uncertain whether the array is ordered such that
- * parents will be unregistered before their children, it is wiser to
- * remove the nodes individually, in the correct order (child before
- * parent).
- */
-void software_node_unregister_nodes(const struct software_node *nodes)
-{
-	unsigned int i = 0;
-
-	while (nodes[i].name)
-		i++;
-
-	while (i--)
-		software_node_unregister(&nodes[i]);
-}
-EXPORT_SYMBOL_GPL(software_node_unregister_nodes);
-
-/**
  * software_node_register_node_group - Register a group of software nodes
  * @node_group: NULL terminated array of software node pointers to be registered
  *
@@ -894,7 +862,7 @@ EXPORT_SYMBOL_GPL(software_node_unregister_nodes);
  * of this function or by ordering the array such that parent comes before
  * child.
  */
-int software_node_register_node_group(const struct software_node **node_group)
+int software_node_register_node_group(const struct software_node * const *node_group)
 {
 	unsigned int i;
 	int ret;
@@ -927,8 +895,7 @@ EXPORT_SYMBOL_GPL(software_node_register_node_group);
  * remove the nodes individually, in the correct order (child before
  * parent).
  */
-void software_node_unregister_node_group(
-		const struct software_node **node_group)
+void software_node_unregister_node_group(const struct software_node * const *node_group)
 {
 	unsigned int i = 0;
 
@@ -1053,7 +1020,7 @@ int device_add_software_node(struct device *dev, const struct software_node *nod
 	 * balance.
 	 */
 	if (device_is_registered(dev))
-		software_node_notify(dev, KOBJ_ADD);
+		software_node_notify(dev);
 
 	return 0;
 }
@@ -1074,7 +1041,8 @@ void device_remove_software_node(struct device *dev)
 		return;
 
 	if (device_is_registered(dev))
-		software_node_notify(dev, KOBJ_REMOVE);
+		software_node_notify_remove(dev);
+
 	set_secondary_fwnode(dev, NULL);
 	kobject_put(&swnode->kobj);
 }
@@ -1113,48 +1081,50 @@ int device_create_managed_software_node(struct device *dev,
 	to_swnode(fwnode)->managed = true;
 	set_secondary_fwnode(dev, fwnode);
 
+	if (device_is_registered(dev))
+		software_node_notify(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(device_create_managed_software_node);
 
-int software_node_notify(struct device *dev, unsigned long action)
+void software_node_notify(struct device *dev)
 {
 	struct swnode *swnode;
 	int ret;
 
 	swnode = dev_to_swnode(dev);
 	if (!swnode)
-		return 0;
+		return;
 
-	switch (action) {
-	case KOBJ_ADD:
-		ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
-		if (ret)
-			break;
+	kobject_get(&swnode->kobj);
+	ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
+	if (ret)
+		return;
 
-		ret = sysfs_create_link(&swnode->kobj, &dev->kobj,
-					dev_name(dev));
-		if (ret) {
-			sysfs_remove_link(&dev->kobj, "software_node");
-			break;
-		}
-		kobject_get(&swnode->kobj);
-		break;
-	case KOBJ_REMOVE:
-		sysfs_remove_link(&swnode->kobj, dev_name(dev));
+	ret = sysfs_create_link(&swnode->kobj, &dev->kobj, dev_name(dev));
+	if (ret) {
 		sysfs_remove_link(&dev->kobj, "software_node");
-		kobject_put(&swnode->kobj);
-
-		if (swnode->managed) {
-			set_secondary_fwnode(dev, NULL);
-			kobject_put(&swnode->kobj);
-		}
-		break;
-	default:
-		break;
+		return;
 	}
+}
 
-	return 0;
+void software_node_notify_remove(struct device *dev)
+{
+	struct swnode *swnode;
+
+	swnode = dev_to_swnode(dev);
+	if (!swnode)
+		return;
+
+	sysfs_remove_link(&swnode->kobj, dev_name(dev));
+	sysfs_remove_link(&dev->kobj, "software_node");
+	kobject_put(&swnode->kobj);
+
+	if (swnode->managed) {
+		set_secondary_fwnode(dev, NULL);
+		kobject_put(&swnode->kobj);
+	}
 }
 
 static int __init software_node_init(void)
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index 13db1f78d2ce..483adb796654 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -11,32 +11,32 @@
 #include <linux/suspend.h>
 #include <trace/events/power.h>
 
-static LIST_HEAD(syscore_ops_list);
-static DEFINE_MUTEX(syscore_ops_lock);
+static LIST_HEAD(syscore_list);
+static DEFINE_MUTEX(syscore_lock);
 
 /**
- * register_syscore_ops - Register a set of system core operations.
- * @ops: System core operations to register.
+ * register_syscore - Register a set of system core operations.
+ * @syscore: System core operations to register.
  */
-void register_syscore_ops(struct syscore_ops *ops)
+void register_syscore(struct syscore *syscore)
 {
-	mutex_lock(&syscore_ops_lock);
-	list_add_tail(&ops->node, &syscore_ops_list);
-	mutex_unlock(&syscore_ops_lock);
+	mutex_lock(&syscore_lock);
+	list_add_tail(&syscore->node, &syscore_list);
+	mutex_unlock(&syscore_lock);
 }
-EXPORT_SYMBOL_GPL(register_syscore_ops);
+EXPORT_SYMBOL_GPL(register_syscore);
 
 /**
- * unregister_syscore_ops - Unregister a set of system core operations.
- * @ops: System core operations to unregister.
+ * unregister_syscore - Unregister a set of system core operations.
+ * @syscore: System core operations to unregister.
  */
-void unregister_syscore_ops(struct syscore_ops *ops)
+void unregister_syscore(struct syscore *syscore)
 {
-	mutex_lock(&syscore_ops_lock);
-	list_del(&ops->node);
-	mutex_unlock(&syscore_ops_lock);
+	mutex_lock(&syscore_lock);
+	list_del(&syscore->node);
+	mutex_unlock(&syscore_lock);
 }
-EXPORT_SYMBOL_GPL(unregister_syscore_ops);
+EXPORT_SYMBOL_GPL(unregister_syscore);
 
 #ifdef CONFIG_PM_SLEEP
 /**
@@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(unregister_syscore_ops);
  */
 int syscore_suspend(void)
 {
-	struct syscore_ops *ops;
+	struct syscore *syscore;
 	int ret = 0;
 
 	trace_suspend_resume(TPS("syscore_suspend"), 0, true);
@@ -59,25 +59,27 @@ int syscore_suspend(void)
 	WARN_ONCE(!irqs_disabled(),
 		"Interrupts enabled before system core suspend.\n");
 
-	list_for_each_entry_reverse(ops, &syscore_ops_list, node)
-		if (ops->suspend) {
-			pm_pr_dbg("Calling %pS\n", ops->suspend);
-			ret = ops->suspend();
+	list_for_each_entry_reverse(syscore, &syscore_list, node)
+		if (syscore->ops->suspend) {
+			pm_pr_dbg("Calling %pS\n", syscore->ops->suspend);
+			ret = syscore->ops->suspend(syscore->data);
 			if (ret)
 				goto err_out;
 			WARN_ONCE(!irqs_disabled(),
-				"Interrupts enabled after %pS\n", ops->suspend);
+				"Interrupts enabled after %pS\n",
+				syscore->ops->suspend);
 		}
 
 	trace_suspend_resume(TPS("syscore_suspend"), 0, false);
 	return 0;
 
  err_out:
-	pr_err("PM: System core suspend callback %pS failed.\n", ops->suspend);
+	pr_err("PM: System core suspend callback %pS failed.\n",
+	       syscore->ops->suspend);
 
-	list_for_each_entry_continue(ops, &syscore_ops_list, node)
-		if (ops->resume)
-			ops->resume();
+	list_for_each_entry_continue(syscore, &syscore_list, node)
+		if (syscore->ops->resume)
+			syscore->ops->resume(syscore->data);
 
 	return ret;
 }
@@ -90,18 +92,19 @@ EXPORT_SYMBOL_GPL(syscore_suspend);
  */
 void syscore_resume(void)
 {
-	struct syscore_ops *ops;
+	struct syscore *syscore;
 
 	trace_suspend_resume(TPS("syscore_resume"), 0, true);
 	WARN_ONCE(!irqs_disabled(),
 		"Interrupts enabled before system core resume.\n");
 
-	list_for_each_entry(ops, &syscore_ops_list, node)
-		if (ops->resume) {
-			pm_pr_dbg("Calling %pS\n", ops->resume);
-			ops->resume();
+	list_for_each_entry(syscore, &syscore_list, node)
+		if (syscore->ops->resume) {
+			pm_pr_dbg("Calling %pS\n", syscore->ops->resume);
+			syscore->ops->resume(syscore->data);
 			WARN_ONCE(!irqs_disabled(),
-				"Interrupts enabled after %pS\n", ops->resume);
+				"Interrupts enabled after %pS\n",
+				syscore->ops->resume);
 		}
 	trace_suspend_resume(TPS("syscore_resume"), 0, false);
 }
@@ -113,16 +116,17 @@ EXPORT_SYMBOL_GPL(syscore_resume);
  */
 void syscore_shutdown(void)
 {
-	struct syscore_ops *ops;
+	struct syscore *syscore;
 
-	mutex_lock(&syscore_ops_lock);
+	mutex_lock(&syscore_lock);
 
-	list_for_each_entry_reverse(ops, &syscore_ops_list, node)
-		if (ops->shutdown) {
+	list_for_each_entry_reverse(syscore, &syscore_list, node)
+		if (syscore->ops->shutdown) {
 			if (initcall_debug)
-				pr_info("PM: Calling %pS\n", ops->shutdown);
-			ops->shutdown();
+				pr_info("PM: Calling %pS\n",
+					syscore->ops->shutdown);
+			syscore->ops->shutdown(syscore->data);
 		}
 
-	mutex_unlock(&syscore_ops_lock);
+	mutex_unlock(&syscore_lock);
 }
diff --git a/drivers/base/test/.kunitconfig b/drivers/base/test/.kunitconfig
new file mode 100644
index 000000000000..473923f0998b
--- /dev/null
+++ b/drivers/base/test/.kunitconfig
@@ -0,0 +1,2 @@
+CONFIG_KUNIT=y
+CONFIG_DM_KUNIT_TEST=y
diff --git a/drivers/base/test/Kconfig b/drivers/base/test/Kconfig
index 2f3fa31a948e..2756870615cc 100644
--- a/drivers/base/test/Kconfig
+++ b/drivers/base/test/Kconfig
@@ -8,7 +8,13 @@ config TEST_ASYNC_DRIVER_PROBE
 	  The module name will be test_async_driver_probe.ko
 
 	  If unsure say N.
+
+config DM_KUNIT_TEST
+	tristate "KUnit Tests for the device model" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+
 config DRIVER_PE_KUNIT_TEST
-	bool "KUnit Tests for property entry API" if !KUNIT_ALL_TESTS
-	depends on KUNIT=y
+	tristate "KUnit Tests for property entry API" if !KUNIT_ALL_TESTS
+	depends on KUNIT
 	default KUNIT_ALL_TESTS
diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile
index 64b2f3d744d5..e321dfc7e922 100644
--- a/drivers/base/test/Makefile
+++ b/drivers/base/test/Makefile
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE)	+= test_async_driver_probe.o
 
+obj-$(CONFIG_DM_KUNIT_TEST)	+= root-device-test.o
+obj-$(CONFIG_DM_KUNIT_TEST)	+= platform-device-test.o
+
 obj-$(CONFIG_DRIVER_PE_KUNIT_TEST) += property-entry-test.o
-CFLAGS_REMOVE_property-entry-test.o += -fplugin-arg-structleak_plugin-byref -fplugin-arg-structleak_plugin-byref-all
+CFLAGS_property-entry-test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/base/test/platform-device-test.c b/drivers/base/test/platform-device-test.c
new file mode 100644
index 000000000000..6355a2231b74
--- /dev/null
+++ b/drivers/base/test/platform-device-test.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/platform_device.h>
+#include <kunit/resource.h>
+
+#include <linux/device.h>
+#include <linux/device/bus.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#define DEVICE_NAME "test"
+
+struct test_priv {
+	bool probe_done;
+	bool release_done;
+	wait_queue_head_t probe_wq;
+	wait_queue_head_t release_wq;
+	struct device *dev;
+};
+
+static int platform_device_devm_init(struct kunit *test)
+{
+	struct test_priv *priv;
+
+	priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv);
+	init_waitqueue_head(&priv->probe_wq);
+	init_waitqueue_head(&priv->release_wq);
+
+	test->priv = priv;
+
+	return 0;
+}
+
+static void devm_device_action(void *ptr)
+{
+	struct test_priv *priv = ptr;
+
+	priv->release_done = true;
+	wake_up_interruptible(&priv->release_wq);
+}
+
+static void devm_put_device_action(void *ptr)
+{
+	struct test_priv *priv = ptr;
+
+	put_device(priv->dev);
+	priv->release_done = true;
+	wake_up_interruptible(&priv->release_wq);
+}
+
+#define RELEASE_TIMEOUT_MS	100
+
+/*
+ * Tests that a platform bus, non-probed device will run its
+ * device-managed actions when unregistered.
+ */
+static void platform_device_devm_register_unregister_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	struct test_priv *priv = test->priv;
+	int ret;
+
+	pdev = platform_device_alloc(DEVICE_NAME, PLATFORM_DEVID_NONE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	ret = platform_device_add(pdev);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	priv->dev = &pdev->dev;
+
+	ret = devm_add_action_or_reset(priv->dev, devm_device_action, priv);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	platform_device_unregister(pdev);
+
+	ret = wait_event_interruptible_timeout(priv->release_wq, priv->release_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_EXPECT_GT(test, ret, 0);
+}
+
+/*
+ * Tests that a platform bus, non-probed device will run its
+ * device-managed actions when unregistered, even if someone still holds
+ * a reference to it.
+ */
+static void platform_device_devm_register_get_unregister_with_devm_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	struct test_priv *priv = test->priv;
+	int ret;
+
+	pdev = platform_device_alloc(DEVICE_NAME, PLATFORM_DEVID_NONE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	ret = platform_device_add(pdev);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	priv->dev = &pdev->dev;
+
+	get_device(priv->dev);
+
+	ret = devm_add_action_or_reset(priv->dev, devm_put_device_action, priv);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	platform_device_unregister(pdev);
+
+	ret = wait_event_interruptible_timeout(priv->release_wq, priv->release_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_EXPECT_GT(test, ret, 0);
+}
+
+static int fake_probe(struct platform_device *pdev)
+{
+	struct test_priv *priv = platform_get_drvdata(pdev);
+
+	priv->probe_done = true;
+	wake_up_interruptible(&priv->probe_wq);
+
+	return 0;
+}
+
+static struct platform_driver fake_driver = {
+	.probe	= fake_probe,
+	.driver = {
+		.name = DEVICE_NAME,
+	},
+};
+
+/*
+ * Tests that a platform bus, probed device will run its device-managed
+ * actions when unregistered.
+ */
+static void probed_platform_device_devm_register_unregister_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	struct test_priv *priv = test->priv;
+	int ret;
+
+	ret = platform_driver_register(&fake_driver);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	pdev = platform_device_alloc(DEVICE_NAME, PLATFORM_DEVID_NONE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	priv->dev = &pdev->dev;
+	platform_set_drvdata(pdev, priv);
+
+	ret = platform_device_add(pdev);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = wait_event_interruptible_timeout(priv->probe_wq, priv->probe_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_ASSERT_GT(test, ret, 0);
+
+	ret = devm_add_action_or_reset(priv->dev, devm_device_action, priv);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	platform_device_unregister(pdev);
+
+	ret = wait_event_interruptible_timeout(priv->release_wq, priv->release_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_EXPECT_GT(test, ret, 0);
+
+	platform_driver_unregister(&fake_driver);
+}
+
+/*
+ * Tests that a platform bus, probed device will run its device-managed
+ * actions when unregistered, even if someone still holds a reference to
+ * it.
+ */
+static void probed_platform_device_devm_register_get_unregister_with_devm_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	struct test_priv *priv = test->priv;
+	int ret;
+
+	ret = platform_driver_register(&fake_driver);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	pdev = platform_device_alloc(DEVICE_NAME, PLATFORM_DEVID_NONE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	priv->dev = &pdev->dev;
+	platform_set_drvdata(pdev, priv);
+
+	ret = platform_device_add(pdev);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ret = wait_event_interruptible_timeout(priv->probe_wq, priv->probe_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_ASSERT_GT(test, ret, 0);
+
+	get_device(priv->dev);
+
+	ret = devm_add_action_or_reset(priv->dev, devm_put_device_action, priv);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	platform_device_unregister(pdev);
+
+	ret = wait_event_interruptible_timeout(priv->release_wq, priv->release_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_EXPECT_GT(test, ret, 0);
+
+	platform_driver_unregister(&fake_driver);
+}
+
+static struct kunit_case platform_device_devm_tests[] = {
+	KUNIT_CASE(platform_device_devm_register_unregister_test),
+	KUNIT_CASE(platform_device_devm_register_get_unregister_with_devm_test),
+	KUNIT_CASE(probed_platform_device_devm_register_unregister_test),
+	KUNIT_CASE(probed_platform_device_devm_register_get_unregister_with_devm_test),
+	{}
+};
+
+static struct kunit_suite platform_device_devm_test_suite = {
+	.name = "platform-device-devm",
+	.init = platform_device_devm_init,
+	.test_cases = platform_device_devm_tests,
+};
+
+static void platform_device_find_by_null_test(struct kunit *test)
+{
+	struct platform_device *pdev;
+	int ret;
+
+	pdev = kunit_platform_device_alloc(test, DEVICE_NAME, PLATFORM_DEVID_NONE);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev);
+
+	ret = kunit_platform_device_add(test, pdev);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	KUNIT_EXPECT_PTR_EQ(test, of_find_device_by_node(NULL), NULL);
+
+	KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_of_node(&platform_bus_type, NULL), NULL);
+	KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_fwnode(&platform_bus_type, NULL), NULL);
+	KUNIT_EXPECT_PTR_EQ(test, bus_find_device_by_acpi_dev(&platform_bus_type, NULL), NULL);
+
+	KUNIT_EXPECT_FALSE(test, device_match_of_node(&pdev->dev, NULL));
+	KUNIT_EXPECT_FALSE(test, device_match_fwnode(&pdev->dev, NULL));
+	KUNIT_EXPECT_FALSE(test, device_match_acpi_dev(&pdev->dev, NULL));
+	KUNIT_EXPECT_FALSE(test, device_match_acpi_handle(&pdev->dev, NULL));
+}
+
+static struct kunit_case platform_device_match_tests[] = {
+	KUNIT_CASE(platform_device_find_by_null_test),
+	{}
+};
+
+static struct kunit_suite platform_device_match_test_suite = {
+	.name = "platform-device-match",
+	.test_cases = platform_device_match_tests,
+};
+
+kunit_test_suites(
+	&platform_device_devm_test_suite,
+	&platform_device_match_test_suite,
+);
+
+MODULE_DESCRIPTION("Test module for platform devices");
+MODULE_AUTHOR("Maxime Ripard <mripard@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/test/property-entry-test.c b/drivers/base/test/property-entry-test.c
index 6071d5bc128c..a8657eb06f94 100644
--- a/drivers/base/test/property-entry-test.c
+++ b/drivers/base/test/property-entry-test.c
@@ -405,20 +405,18 @@ static void pe_test_move_inline_str(struct kunit *test)
 /* Handling of reference properties */
 static void pe_test_reference(struct kunit *test)
 {
-	static const struct software_node nodes[] = {
-		{ .name = "1", },
-		{ .name = "2", },
-		{ }
-	};
+	static const struct software_node node1 = { .name = "1" };
+	static const struct software_node node2 = { .name = "2" };
+	static const struct software_node *group[] = { &node1, &node2, NULL };
 
 	static const struct software_node_ref_args refs[] = {
-		SOFTWARE_NODE_REFERENCE(&nodes[0]),
-		SOFTWARE_NODE_REFERENCE(&nodes[1], 3, 4),
+		SOFTWARE_NODE_REFERENCE(&node1),
+		SOFTWARE_NODE_REFERENCE(&node2, 3, 4),
 	};
 
 	const struct property_entry entries[] = {
-		PROPERTY_ENTRY_REF("ref-1", &nodes[0]),
-		PROPERTY_ENTRY_REF("ref-2", &nodes[1], 1, 2),
+		PROPERTY_ENTRY_REF("ref-1", &node1),
+		PROPERTY_ENTRY_REF("ref-2", &node2, 1, 2),
 		PROPERTY_ENTRY_REF_ARRAY("ref-3", refs),
 		{ }
 	};
@@ -427,7 +425,7 @@ static void pe_test_reference(struct kunit *test)
 	struct fwnode_reference_args ref;
 	int error;
 
-	error = software_node_register_nodes(nodes);
+	error = software_node_register_node_group(group);
 	KUNIT_ASSERT_EQ(test, error, 0);
 
 	node = fwnode_create_software_node(entries, NULL);
@@ -436,7 +434,7 @@ static void pe_test_reference(struct kunit *test)
 	error = fwnode_property_get_reference_args(node, "ref-1", NULL,
 						   0, 0, &ref);
 	KUNIT_ASSERT_EQ(test, error, 0);
-	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[0]);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &node1);
 	KUNIT_EXPECT_EQ(test, ref.nargs, 0U);
 
 	/* wrong index */
@@ -447,7 +445,7 @@ static void pe_test_reference(struct kunit *test)
 	error = fwnode_property_get_reference_args(node, "ref-2", NULL,
 						   1, 0, &ref);
 	KUNIT_ASSERT_EQ(test, error, 0);
-	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[1]);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &node2);
 	KUNIT_EXPECT_EQ(test, ref.nargs, 1U);
 	KUNIT_EXPECT_EQ(test, ref.args[0], 1LLU);
 
@@ -455,7 +453,7 @@ static void pe_test_reference(struct kunit *test)
 	error = fwnode_property_get_reference_args(node, "ref-2", NULL,
 						   3, 0, &ref);
 	KUNIT_ASSERT_EQ(test, error, 0);
-	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[1]);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &node2);
 	KUNIT_EXPECT_EQ(test, ref.nargs, 3U);
 	KUNIT_EXPECT_EQ(test, ref.args[0], 1LLU);
 	KUNIT_EXPECT_EQ(test, ref.args[1], 2LLU);
@@ -470,14 +468,14 @@ static void pe_test_reference(struct kunit *test)
 	error = fwnode_property_get_reference_args(node, "ref-3", NULL,
 						   0, 0, &ref);
 	KUNIT_ASSERT_EQ(test, error, 0);
-	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[0]);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &node1);
 	KUNIT_EXPECT_EQ(test, ref.nargs, 0U);
 
 	/* second reference in the array */
 	error = fwnode_property_get_reference_args(node, "ref-3", NULL,
 						   2, 1, &ref);
 	KUNIT_ASSERT_EQ(test, error, 0);
-	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &nodes[1]);
+	KUNIT_EXPECT_PTR_EQ(test, to_software_node(ref.fwnode), &node2);
 	KUNIT_EXPECT_EQ(test, ref.nargs, 2U);
 	KUNIT_EXPECT_EQ(test, ref.args[0], 3LLU);
 	KUNIT_EXPECT_EQ(test, ref.args[1], 4LLU);
@@ -488,7 +486,7 @@ static void pe_test_reference(struct kunit *test)
 	KUNIT_EXPECT_NE(test, error, 0);
 
 	fwnode_remove_software_node(node);
-	software_node_unregister_nodes(nodes);
+	software_node_unregister_node_group(group);
 }
 
 static struct kunit_case property_entry_test_cases[] = {
@@ -508,3 +506,7 @@ static struct kunit_suite property_entry_test_suite = {
 };
 
 kunit_test_suite(property_entry_test_suite);
+
+MODULE_DESCRIPTION("Test module for the property entry API");
+MODULE_AUTHOR("Dmitry Torokhov <dtor@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/test/root-device-test.c b/drivers/base/test/root-device-test.c
new file mode 100644
index 000000000000..9aea23c9123e
--- /dev/null
+++ b/drivers/base/test/root-device-test.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2023 Maxime Ripard <mripard@kernel.org>
+
+#include <kunit/resource.h>
+
+#include <linux/device.h>
+
+#define DEVICE_NAME "test"
+
+struct test_priv {
+	bool probe_done;
+	bool release_done;
+	wait_queue_head_t release_wq;
+	struct device *dev;
+};
+
+static int root_device_devm_init(struct kunit *test)
+{
+	struct test_priv *priv;
+
+	priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv);
+	init_waitqueue_head(&priv->release_wq);
+
+	test->priv = priv;
+
+	return 0;
+}
+
+static void devm_device_action(void *ptr)
+{
+	struct test_priv *priv = ptr;
+
+	priv->release_done = true;
+	wake_up_interruptible(&priv->release_wq);
+}
+
+#define RELEASE_TIMEOUT_MS	100
+
+/*
+ * Tests that a bus-less, non-probed device will run its device-managed
+ * actions when unregistered.
+ */
+static void root_device_devm_register_unregister_test(struct kunit *test)
+{
+	struct test_priv *priv = test->priv;
+	int ret;
+
+	priv->dev = root_device_register(DEVICE_NAME);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->dev);
+
+	ret = devm_add_action_or_reset(priv->dev, devm_device_action, priv);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	root_device_unregister(priv->dev);
+
+	ret = wait_event_interruptible_timeout(priv->release_wq, priv->release_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_EXPECT_GT(test, ret, 0);
+}
+
+static void devm_put_device_action(void *ptr)
+{
+	struct test_priv *priv = ptr;
+
+	put_device(priv->dev);
+	priv->release_done = true;
+	wake_up_interruptible(&priv->release_wq);
+}
+
+/*
+ * Tests that a bus-less, non-probed device will run its device-managed
+ * actions when unregistered, even if someone still holds a reference to
+ * it.
+ */
+static void root_device_devm_register_get_unregister_with_devm_test(struct kunit *test)
+{
+	struct test_priv *priv = test->priv;
+	int ret;
+
+	priv->dev = root_device_register(DEVICE_NAME);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->dev);
+
+	get_device(priv->dev);
+
+	ret = devm_add_action_or_reset(priv->dev, devm_put_device_action, priv);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	root_device_unregister(priv->dev);
+
+	ret = wait_event_interruptible_timeout(priv->release_wq, priv->release_done,
+					       msecs_to_jiffies(RELEASE_TIMEOUT_MS));
+	KUNIT_EXPECT_GT(test, ret, 0);
+}
+
+static struct kunit_case root_device_devm_tests[] = {
+	KUNIT_CASE(root_device_devm_register_unregister_test),
+	KUNIT_CASE(root_device_devm_register_get_unregister_with_devm_test),
+	{}
+};
+
+static struct kunit_suite root_device_devm_test_suite = {
+	.name = "root-device-devm",
+	.init = root_device_devm_init,
+	.test_cases = root_device_devm_tests,
+};
+
+kunit_test_suite(root_device_devm_test_suite);
+
+MODULE_DESCRIPTION("Test module for root devices");
+MODULE_AUTHOR("Maxime Ripard <mripard@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c
index 3bb7beb127a9..3465800baa6c 100644
--- a/drivers/base/test/test_async_driver_probe.c
+++ b/drivers/base/test/test_async_driver_probe.c
@@ -84,7 +84,7 @@ test_platform_device_register_node(char *name, int id, int nid)
 
 	pdev = platform_device_alloc(name, id);
 	if (!pdev)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	if (nid != NUMA_NO_NODE)
 		set_dev_node(&pdev->dev, nid);
@@ -104,7 +104,7 @@ static int __init test_async_probe_init(void)
 	struct platform_device **pdev = NULL;
 	int async_id = 0, sync_id = 0;
 	unsigned long long duration;
-	ktime_t calltime, delta;
+	ktime_t calltime;
 	int err, nid, cpu;
 
 	pr_info("registering first set of asynchronous devices...\n");
@@ -133,8 +133,7 @@ static int __init test_async_probe_init(void)
 		goto err_unregister_async_devs;
 	}
 
-	delta = ktime_sub(ktime_get(), calltime);
-	duration = (unsigned long long) ktime_to_ms(delta);
+	duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime);
 	pr_info("registration took %lld msecs\n", duration);
 	if (duration > TEST_PROBE_THRESHOLD) {
 		pr_err("test failed: probe took too long\n");
@@ -146,7 +145,7 @@ static int __init test_async_probe_init(void)
 	calltime = ktime_get();
 	for_each_online_cpu(cpu) {
 		nid = cpu_to_node(cpu);
-		pdev = &sync_dev[sync_id];
+		pdev = &async_dev[async_id];
 
 		*pdev = test_platform_device_register_node("test_async_driver",
 							   async_id,
@@ -161,8 +160,7 @@ static int __init test_async_probe_init(void)
 		async_id++;
 	}
 
-	delta = ktime_sub(ktime_get(), calltime);
-	duration = (unsigned long long) ktime_to_ms(delta);
+	duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime);
 	dev_info(&(*pdev)->dev,
 		 "registration took %lld msecs\n", duration);
 	if (duration > TEST_PROBE_THRESHOLD) {
@@ -197,8 +195,7 @@ static int __init test_async_probe_init(void)
 		goto err_unregister_sync_devs;
 	}
 
-	delta = ktime_sub(ktime_get(), calltime);
-	duration = (unsigned long long) ktime_to_ms(delta);
+	duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime);
 	pr_info("registration took %lld msecs\n", duration);
 	if (duration < TEST_PROBE_THRESHOLD) {
 		dev_err(&(*pdev)->dev,
@@ -223,8 +220,7 @@ static int __init test_async_probe_init(void)
 
 	sync_id++;
 
-	delta = ktime_sub(ktime_get(), calltime);
-	duration = (unsigned long long) ktime_to_ms(delta);
+	duration = (unsigned long long)ktime_ms_delta(ktime_get(), calltime);
 	dev_info(&(*pdev)->dev,
 		 "registration took %lld msecs\n", duration);
 	if (duration < TEST_PROBE_THRESHOLD) {
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 4d254fcc93d1..c890e2a5b428 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -14,106 +14,173 @@
 #include <linux/hardirq.h>
 #include <linux/topology.h>
 
-#define define_id_show_func(name)					\
+#define define_id_show_func(name, fmt)					\
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	return sysfs_emit(buf, "%d\n", topology_##name(dev->id));	\
+	return sysfs_emit(buf, fmt "\n", topology_##name(dev->id));	\
 }
 
-#define define_siblings_show_map(name, mask)				\
-static ssize_t name##_show(struct device *dev,				\
-			   struct device_attribute *attr, char *buf)	\
-{									\
-	return cpumap_print_to_pagebuf(false, buf, topology_##mask(dev->id));\
+#define define_siblings_read_func(name, mask)					\
+static ssize_t name##_read(struct file *file, struct kobject *kobj,		\
+			   const struct bin_attribute *attr, char *buf,		\
+			   loff_t off, size_t count)				\
+{										\
+	struct device *dev = kobj_to_dev(kobj);                                 \
+	cpumask_var_t mask;							\
+	ssize_t n;								\
+										\
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))				\
+		return -ENOMEM;							\
+										\
+	cpumask_copy(mask, topology_##mask(dev->id));				\
+	n = cpumap_print_bitmask_to_buf(buf, mask, off, count);			\
+	free_cpumask_var(mask);							\
+										\
+	return n;								\
+}										\
+										\
+static ssize_t name##_list_read(struct file *file, struct kobject *kobj,	\
+				const struct bin_attribute *attr, char *buf,	\
+				loff_t off, size_t count)			\
+{										\
+	struct device *dev = kobj_to_dev(kobj);					\
+	cpumask_var_t mask;							\
+	ssize_t n;								\
+										\
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))				\
+		return -ENOMEM;							\
+										\
+	cpumask_copy(mask, topology_##mask(dev->id));				\
+	n = cpumap_print_list_to_buf(buf, mask, off, count);			\
+	free_cpumask_var(mask);							\
+										\
+	return n;								\
 }
 
-#define define_siblings_show_list(name, mask)				\
-static ssize_t name##_list_show(struct device *dev,			\
-				struct device_attribute *attr,		\
-				char *buf)				\
-{									\
-	return cpumap_print_to_pagebuf(true, buf, topology_##mask(dev->id));\
-}
-
-#define define_siblings_show_func(name, mask)	\
-	define_siblings_show_map(name, mask);	\
-	define_siblings_show_list(name, mask)
-
-define_id_show_func(physical_package_id);
+define_id_show_func(physical_package_id, "%d");
 static DEVICE_ATTR_RO(physical_package_id);
 
-define_id_show_func(die_id);
+#ifdef TOPOLOGY_DIE_SYSFS
+define_id_show_func(die_id, "%d");
 static DEVICE_ATTR_RO(die_id);
+#endif
+
+#ifdef TOPOLOGY_CLUSTER_SYSFS
+define_id_show_func(cluster_id, "%d");
+static DEVICE_ATTR_RO(cluster_id);
+#endif
 
-define_id_show_func(core_id);
+define_id_show_func(core_id, "%d");
 static DEVICE_ATTR_RO(core_id);
 
-define_siblings_show_func(thread_siblings, sibling_cpumask);
-static DEVICE_ATTR_RO(thread_siblings);
-static DEVICE_ATTR_RO(thread_siblings_list);
+define_id_show_func(ppin, "0x%llx");
+static DEVICE_ATTR_ADMIN_RO(ppin);
+
+define_siblings_read_func(thread_siblings, sibling_cpumask);
+static const BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES);
 
-define_siblings_show_func(core_cpus, sibling_cpumask);
-static DEVICE_ATTR_RO(core_cpus);
-static DEVICE_ATTR_RO(core_cpus_list);
+define_siblings_read_func(core_cpus, sibling_cpumask);
+static const BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES);
 
-define_siblings_show_func(core_siblings, core_cpumask);
-static DEVICE_ATTR_RO(core_siblings);
-static DEVICE_ATTR_RO(core_siblings_list);
+define_siblings_read_func(core_siblings, core_cpumask);
+static const BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES);
 
-define_siblings_show_func(die_cpus, die_cpumask);
-static DEVICE_ATTR_RO(die_cpus);
-static DEVICE_ATTR_RO(die_cpus_list);
+#ifdef TOPOLOGY_CLUSTER_SYSFS
+define_siblings_read_func(cluster_cpus, cluster_cpumask);
+static const BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES);
+#endif
+
+#ifdef TOPOLOGY_DIE_SYSFS
+define_siblings_read_func(die_cpus, die_cpumask);
+static const BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES);
+#endif
 
-define_siblings_show_func(package_cpus, core_cpumask);
-static DEVICE_ATTR_RO(package_cpus);
-static DEVICE_ATTR_RO(package_cpus_list);
+define_siblings_read_func(package_cpus, core_cpumask);
+static const BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES);
 
-#ifdef CONFIG_SCHED_BOOK
-define_id_show_func(book_id);
+#ifdef TOPOLOGY_BOOK_SYSFS
+define_id_show_func(book_id, "%d");
 static DEVICE_ATTR_RO(book_id);
-define_siblings_show_func(book_siblings, book_cpumask);
-static DEVICE_ATTR_RO(book_siblings);
-static DEVICE_ATTR_RO(book_siblings_list);
+define_siblings_read_func(book_siblings, book_cpumask);
+static const BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES);
 #endif
 
-#ifdef CONFIG_SCHED_DRAWER
-define_id_show_func(drawer_id);
+#ifdef TOPOLOGY_DRAWER_SYSFS
+define_id_show_func(drawer_id, "%d");
 static DEVICE_ATTR_RO(drawer_id);
-define_siblings_show_func(drawer_siblings, drawer_cpumask);
-static DEVICE_ATTR_RO(drawer_siblings);
-static DEVICE_ATTR_RO(drawer_siblings_list);
+define_siblings_read_func(drawer_siblings, drawer_cpumask);
+static const BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES);
+static const BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES);
 #endif
 
+static const struct bin_attribute *const bin_attrs[] = {
+	&bin_attr_core_cpus,
+	&bin_attr_core_cpus_list,
+	&bin_attr_thread_siblings,
+	&bin_attr_thread_siblings_list,
+	&bin_attr_core_siblings,
+	&bin_attr_core_siblings_list,
+#ifdef TOPOLOGY_CLUSTER_SYSFS
+	&bin_attr_cluster_cpus,
+	&bin_attr_cluster_cpus_list,
+#endif
+#ifdef TOPOLOGY_DIE_SYSFS
+	&bin_attr_die_cpus,
+	&bin_attr_die_cpus_list,
+#endif
+	&bin_attr_package_cpus,
+	&bin_attr_package_cpus_list,
+#ifdef TOPOLOGY_BOOK_SYSFS
+	&bin_attr_book_siblings,
+	&bin_attr_book_siblings_list,
+#endif
+#ifdef TOPOLOGY_DRAWER_SYSFS
+	&bin_attr_drawer_siblings,
+	&bin_attr_drawer_siblings_list,
+#endif
+	NULL
+};
+
 static struct attribute *default_attrs[] = {
 	&dev_attr_physical_package_id.attr,
+#ifdef TOPOLOGY_DIE_SYSFS
 	&dev_attr_die_id.attr,
+#endif
+#ifdef TOPOLOGY_CLUSTER_SYSFS
+	&dev_attr_cluster_id.attr,
+#endif
 	&dev_attr_core_id.attr,
-	&dev_attr_thread_siblings.attr,
-	&dev_attr_thread_siblings_list.attr,
-	&dev_attr_core_cpus.attr,
-	&dev_attr_core_cpus_list.attr,
-	&dev_attr_core_siblings.attr,
-	&dev_attr_core_siblings_list.attr,
-	&dev_attr_die_cpus.attr,
-	&dev_attr_die_cpus_list.attr,
-	&dev_attr_package_cpus.attr,
-	&dev_attr_package_cpus_list.attr,
-#ifdef CONFIG_SCHED_BOOK
+#ifdef TOPOLOGY_BOOK_SYSFS
 	&dev_attr_book_id.attr,
-	&dev_attr_book_siblings.attr,
-	&dev_attr_book_siblings_list.attr,
 #endif
-#ifdef CONFIG_SCHED_DRAWER
+#ifdef TOPOLOGY_DRAWER_SYSFS
 	&dev_attr_drawer_id.attr,
-	&dev_attr_drawer_siblings.attr,
-	&dev_attr_drawer_siblings_list.attr,
 #endif
+	&dev_attr_ppin.attr,
 	NULL
 };
 
+static umode_t topology_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int unused)
+{
+	if (attr == &dev_attr_ppin.attr && !topology_ppin(kobj_to_dev(kobj)->id))
+		return 0;
+
+	return attr->mode;
+}
+
 static const struct attribute_group topology_attr_group = {
 	.attrs = default_attrs,
+	.bin_attrs = bin_attrs,
+	.is_visible = topology_is_visible,
 	.name = "topology"
 };
 
@@ -141,3 +208,55 @@ static int __init topology_sysfs_init(void)
 }
 
 device_initcall(topology_sysfs_init);
+
+DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
+
+void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
+{
+	per_cpu(cpu_scale, cpu) = capacity;
+}
+
+static ssize_t cpu_capacity_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+	return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
+}
+
+static DEVICE_ATTR_RO(cpu_capacity);
+
+static int cpu_capacity_sysctl_add(unsigned int cpu)
+{
+	struct device *cpu_dev = get_cpu_device(cpu);
+
+	if (!cpu_dev)
+		return -ENOENT;
+
+	device_create_file(cpu_dev, &dev_attr_cpu_capacity);
+
+	return 0;
+}
+
+static int cpu_capacity_sysctl_remove(unsigned int cpu)
+{
+	struct device *cpu_dev = get_cpu_device(cpu);
+
+	if (!cpu_dev)
+		return -ENOENT;
+
+	device_remove_file(cpu_dev, &dev_attr_cpu_capacity);
+
+	return 0;
+}
+
+static int register_cpu_capacity_sysctl(void)
+{
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity",
+			  cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove);
+
+	return 0;
+}
+subsys_initcall(register_cpu_capacity_sysctl);
diff --git a/drivers/base/trace.h b/drivers/base/trace.h
index 3192e18f877e..3b83b13a57ff 100644
--- a/drivers/base/trace.h
+++ b/drivers/base/trace.h
@@ -24,18 +24,18 @@ DECLARE_EVENT_CLASS(devres,
 		__field(struct device *, dev)
 		__field(const char *, op)
 		__field(void *, node)
-		__field(const char *, name)
+		__string(name, name)
 		__field(size_t, size)
 	),
 	TP_fast_assign(
-		__assign_str(devname, dev_name(dev));
+		__assign_str(devname);
 		__entry->op = op;
 		__entry->node = node;
-		__entry->name = name;
+		__assign_str(name);
 		__entry->size = size;
 	),
 	TP_printk("%s %3s %p %s (%zu bytes)", __get_str(devname),
-		  __entry->op, __entry->node, __entry->name, __entry->size)
+		  __entry->op, __entry->node, __get_str(name), __entry->size)
 );
 
 DEFINE_EVENT(devres, devres_log,
diff --git a/drivers/base/transport_class.c b/drivers/base/transport_class.c
index ccc86206e508..09ee2a1e35bb 100644
--- a/drivers/base/transport_class.c
+++ b/drivers/base/transport_class.c
@@ -155,12 +155,27 @@ static int transport_add_class_device(struct attribute_container *cont,
 				      struct device *dev,
 				      struct device *classdev)
 {
+	struct transport_class *tclass = class_to_transport_class(cont->class);
 	int error = attribute_container_add_class_device(classdev);
 	struct transport_container *tcont = 
 		attribute_container_to_transport_container(cont);
 
-	if (!error && tcont->statistics)
+	if (error)
+		goto err_remove;
+
+	if (tcont->statistics) {
 		error = sysfs_create_group(&classdev->kobj, tcont->statistics);
+		if (error)
+			goto err_del;
+	}
+
+	return 0;
+
+err_del:
+	attribute_container_class_device_del(classdev);
+err_remove:
+	if (tclass->remove)
+		tclass->remove(tcont, dev, classdev);
 
 	return error;
 }