8 files changed, 1228 insertions, 6166 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 4a06c37b9cf1..d8a04b195da2 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_X86_CPU_RESCTRL)	+= core.o rdtgroup.o monitor.o
-obj-$(CONFIG_X86_CPU_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
+obj-$(CONFIG_X86_CPU_RESCTRL)		+= core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL)		+= ctrlmondata.o
+obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK)	+= pseudo_lock.o
+
+# To allow define_trace.h's recursive include:
 CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 23001ae03e82..3792ab4819dc 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -16,17 +16,25 @@
 
 #define pr_fmt(fmt)	"resctrl: " fmt
 
+#include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/err.h>
-#include <linux/cacheinfo.h>
 #include <linux/cpuhotplug.h>
 
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
 #include <asm/resctrl.h>
 #include "internal.h"
 
-/* Mutex to protect rdtgroup access. */
-DEFINE_MUTEX(rdtgroup_mutex);
+/*
+ * rdt_domain structures are kfree()d when their last CPU goes offline,
+ * and allocated when the first CPU in a new domain comes online.
+ * The rdt_resource's domain list is updated when this happens. Readers of
+ * the domain list must either take cpus_read_lock(), or rely on an RCU
+ * read-side critical section, to avoid observing concurrent modification.
+ * All writers take this mutex:
+ */
+static DEFINE_MUTEX(domain_list_lock);
 
 /*
  * The cached resctrl_pqr_state is strictly per CPU and can never be
@@ -37,146 +45,77 @@ DEFINE_MUTEX(rdtgroup_mutex);
 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
 
 /*
- * Used to store the max resource name width and max resource data width
- * to display the schemata in a tabular format
- */
-int max_name_width, max_data_width;
-
-/*
  * Global boolean for rdt_alloc which is true if any
  * resource allocation is enabled.
  */
 bool rdt_alloc_capable;
 
-static void
-mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
-		struct rdt_resource *r);
-static void
-cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
-static void
-mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
-	      struct rdt_resource *r);
+static void mba_wrmsr_intel(struct msr_param *m);
+static void cat_wrmsr(struct msr_param *m);
+static void mba_wrmsr_amd(struct msr_param *m);
 
-#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
+#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
+#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
 
-struct rdt_resource rdt_resources_all[] = {
+struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
 	[RDT_RESOURCE_L3] =
 	{
-		.rid			= RDT_RESOURCE_L3,
-		.name			= "L3",
-		.domains		= domain_init(RDT_RESOURCE_L3),
-		.msr_base		= MSR_IA32_L3_CBM_BASE,
-		.msr_update		= cat_wrmsr,
-		.cache_level		= 3,
-		.cache = {
-			.min_cbm_bits	= 1,
-			.cbm_idx_mult	= 1,
-			.cbm_idx_offset	= 0,
+		.r_resctrl = {
+			.name			= "L3",
+			.ctrl_scope		= RESCTRL_L3_CACHE,
+			.mon_scope		= RESCTRL_L3_CACHE,
+			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L3),
+			.mon_domains		= mon_domain_init(RDT_RESOURCE_L3),
+			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
 		},
-		.parse_ctrlval		= parse_cbm,
-		.format_str		= "%d=%0*x",
-		.fflags			= RFTYPE_RES_CACHE,
-	},
-	[RDT_RESOURCE_L3DATA] =
-	{
-		.rid			= RDT_RESOURCE_L3DATA,
-		.name			= "L3DATA",
-		.domains		= domain_init(RDT_RESOURCE_L3DATA),
-		.msr_base		= MSR_IA32_L3_CBM_BASE,
-		.msr_update		= cat_wrmsr,
-		.cache_level		= 3,
-		.cache = {
-			.min_cbm_bits	= 1,
-			.cbm_idx_mult	= 2,
-			.cbm_idx_offset	= 0,
-		},
-		.parse_ctrlval		= parse_cbm,
-		.format_str		= "%d=%0*x",
-		.fflags			= RFTYPE_RES_CACHE,
-	},
-	[RDT_RESOURCE_L3CODE] =
-	{
-		.rid			= RDT_RESOURCE_L3CODE,
-		.name			= "L3CODE",
-		.domains		= domain_init(RDT_RESOURCE_L3CODE),
 		.msr_base		= MSR_IA32_L3_CBM_BASE,
 		.msr_update		= cat_wrmsr,
-		.cache_level		= 3,
-		.cache = {
-			.min_cbm_bits	= 1,
-			.cbm_idx_mult	= 2,
-			.cbm_idx_offset	= 1,
-		},
-		.parse_ctrlval		= parse_cbm,
-		.format_str		= "%d=%0*x",
-		.fflags			= RFTYPE_RES_CACHE,
 	},
 	[RDT_RESOURCE_L2] =
 	{
-		.rid			= RDT_RESOURCE_L2,
-		.name			= "L2",
-		.domains		= domain_init(RDT_RESOURCE_L2),
-		.msr_base		= MSR_IA32_L2_CBM_BASE,
-		.msr_update		= cat_wrmsr,
-		.cache_level		= 2,
-		.cache = {
-			.min_cbm_bits	= 1,
-			.cbm_idx_mult	= 1,
-			.cbm_idx_offset	= 0,
+		.r_resctrl = {
+			.name			= "L2",
+			.ctrl_scope		= RESCTRL_L2_CACHE,
+			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L2),
+			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
 		},
-		.parse_ctrlval		= parse_cbm,
-		.format_str		= "%d=%0*x",
-		.fflags			= RFTYPE_RES_CACHE,
-	},
-	[RDT_RESOURCE_L2DATA] =
-	{
-		.rid			= RDT_RESOURCE_L2DATA,
-		.name			= "L2DATA",
-		.domains		= domain_init(RDT_RESOURCE_L2DATA),
 		.msr_base		= MSR_IA32_L2_CBM_BASE,
 		.msr_update		= cat_wrmsr,
-		.cache_level		= 2,
-		.cache = {
-			.min_cbm_bits	= 1,
-			.cbm_idx_mult	= 2,
-			.cbm_idx_offset	= 0,
-		},
-		.parse_ctrlval		= parse_cbm,
-		.format_str		= "%d=%0*x",
-		.fflags			= RFTYPE_RES_CACHE,
 	},
-	[RDT_RESOURCE_L2CODE] =
+	[RDT_RESOURCE_MBA] =
 	{
-		.rid			= RDT_RESOURCE_L2CODE,
-		.name			= "L2CODE",
-		.domains		= domain_init(RDT_RESOURCE_L2CODE),
-		.msr_base		= MSR_IA32_L2_CBM_BASE,
-		.msr_update		= cat_wrmsr,
-		.cache_level		= 2,
-		.cache = {
-			.min_cbm_bits	= 1,
-			.cbm_idx_mult	= 2,
-			.cbm_idx_offset	= 1,
+		.r_resctrl = {
+			.name			= "MB",
+			.ctrl_scope		= RESCTRL_L3_CACHE,
+			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_MBA),
+			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
 		},
-		.parse_ctrlval		= parse_cbm,
-		.format_str		= "%d=%0*x",
-		.fflags			= RFTYPE_RES_CACHE,
 	},
-	[RDT_RESOURCE_MBA] =
+	[RDT_RESOURCE_SMBA] =
 	{
-		.rid			= RDT_RESOURCE_MBA,
-		.name			= "MB",
-		.domains		= domain_init(RDT_RESOURCE_MBA),
-		.cache_level		= 3,
-		.parse_ctrlval		= parse_bw,
-		.format_str		= "%d=%*u",
-		.fflags			= RFTYPE_RES_MB,
+		.r_resctrl = {
+			.name			= "SMBA",
+			.ctrl_scope		= RESCTRL_L3_CACHE,
+			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_SMBA),
+			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
+		},
 	},
 };
 
-static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
+u32 resctrl_arch_system_num_rmid_idx(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+	return r->mon.num_rmid;
+}
+
+struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
 {
-	return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset;
+	if (l >= RDT_NUM_RESOURCES)
+		return NULL;
+
+	return &rdt_resources_all[l].r_resctrl;
 }
 
 /*
@@ -199,37 +138,29 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
  */
 static inline void cache_alloc_hsw_probe(void)
 {
-	struct rdt_resource *r  = &rdt_resources_all[RDT_RESOURCE_L3];
-	u32 l, h, max_cbm = BIT_MASK(20) - 1;
+	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
+	struct rdt_resource *r  = &hw_res->r_resctrl;
+	u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
 
-	if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
+	if (wrmsrq_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
 		return;
 
-	rdmsr(MSR_IA32_L3_CBM_BASE, l, h);
+	rdmsrq(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
 
 	/* If all the bits were set in MSR, return success */
-	if (l != max_cbm)
+	if (l3_cbm_0 != max_cbm)
 		return;
 
-	r->num_closid = 4;
-	r->default_ctrl = max_cbm;
+	hw_res->num_closid = 4;
 	r->cache.cbm_len = 20;
 	r->cache.shareable_bits = 0xc0000;
 	r->cache.min_cbm_bits = 2;
+	r->cache.arch_has_sparse_bitmasks = false;
 	r->alloc_capable = true;
-	r->alloc_enabled = true;
 
 	rdt_alloc_capable = true;
 }
 
-bool is_mba_sc(struct rdt_resource *r)
-{
-	if (!r)
-		return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
-
-	return r->membw.mba_sc;
-}
-
 /*
  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
  * exposed to user interface and the h/w understandable delay values.
@@ -251,16 +182,17 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
 	return false;
 }
 
-static bool __get_mem_config_intel(struct rdt_resource *r)
+static __init bool __get_mem_config_intel(struct rdt_resource *r)
 {
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	union cpuid_0x10_3_eax eax;
 	union cpuid_0x10_x_edx edx;
 	u32 ebx, ecx, max_delay;
 
 	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
-	r->num_closid = edx.split.cos_max + 1;
+	hw_res->num_closid = edx.split.cos_max + 1;
 	max_delay = eax.split.max_delay + 1;
-	r->default_ctrl = MAX_MBA_BW;
+	r->membw.max_bw = MAX_MBA_BW;
 	r->membw.arch_needs_linear = true;
 	if (ecx & MBA_IS_LINEAR) {
 		r->membw.delay_linear = true;
@@ -271,29 +203,31 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
 			return false;
 		r->membw.arch_needs_linear = false;
 	}
-	r->data_width = 3;
 
 	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
 		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
 	else
 		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
-	thread_throttle_mode_init();
 
 	r->alloc_capable = true;
-	r->alloc_enabled = true;
 
 	return true;
 }
 
-static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
+static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
 {
-	union cpuid_0x10_3_eax eax;
-	union cpuid_0x10_x_edx edx;
-	u32 ebx, ecx;
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	u32 eax, ebx, ecx, edx, subleaf;
+
+	/*
+	 * Query CPUID_Fn80000020_EDX_x01 for MBA and
+	 * CPUID_Fn80000020_EDX_x02 for SMBA
+	 */
+	subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 :  1;
 
-	cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full);
-	r->num_closid = edx.split.cos_max + 1;
-	r->default_ctrl = MAX_MBA_BW_AMD;
+	cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
+	hw_res->num_closid = edx + 1;
+	r->membw.max_bw = 1 << eax;
 
 	/* AMD does not use delay */
 	r->membw.delay_linear = false;
@@ -306,68 +240,63 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
 	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
 	r->membw.min_bw = 0;
 	r->membw.bw_gran = 1;
-	/* Max value is 2048, Data width should be 4 in decimal */
-	r->data_width = 4;
 
 	r->alloc_capable = true;
-	r->alloc_enabled = true;
 
 	return true;
 }
 
 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
 {
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	union cpuid_0x10_1_eax eax;
+	union cpuid_0x10_x_ecx ecx;
 	union cpuid_0x10_x_edx edx;
-	u32 ebx, ecx;
+	u32 ebx, default_ctrl;
 
-	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
-	r->num_closid = edx.split.cos_max + 1;
+	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
+	hw_res->num_closid = edx.split.cos_max + 1;
 	r->cache.cbm_len = eax.split.cbm_len + 1;
-	r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
-	r->cache.shareable_bits = ebx & r->default_ctrl;
-	r->data_width = (r->cache.cbm_len + 3) / 4;
+	default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
+	r->cache.shareable_bits = ebx & default_ctrl;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
 	r->alloc_capable = true;
-	r->alloc_enabled = true;
 }
 
-static void rdt_get_cdp_config(int level, int type)
+static void rdt_get_cdp_config(int level)
 {
-	struct rdt_resource *r_l = &rdt_resources_all[level];
-	struct rdt_resource *r = &rdt_resources_all[type];
-
-	r->num_closid = r_l->num_closid / 2;
-	r->cache.cbm_len = r_l->cache.cbm_len;
-	r->default_ctrl = r_l->default_ctrl;
-	r->cache.shareable_bits = r_l->cache.shareable_bits;
-	r->data_width = (r->cache.cbm_len + 3) / 4;
-	r->alloc_capable = true;
 	/*
 	 * By default, CDP is disabled. CDP can be enabled by mount parameter
 	 * "cdp" during resctrl file system mount time.
 	 */
-	r->alloc_enabled = false;
+	rdt_resources_all[level].cdp_enabled = false;
+	rdt_resources_all[level].r_resctrl.cdp_capable = true;
+}
+
+static void rdt_set_io_alloc_capable(struct rdt_resource *r)
+{
+	r->cache.io_alloc_capable = true;
 }
 
 static void rdt_get_cdp_l3_config(void)
 {
-	rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA);
-	rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE);
+	rdt_get_cdp_config(RDT_RESOURCE_L3);
 }
 
 static void rdt_get_cdp_l2_config(void)
 {
-	rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA);
-	rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
+	rdt_get_cdp_config(RDT_RESOURCE_L2);
 }
 
-static void
-mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+static void mba_wrmsr_amd(struct msr_param *m)
 {
+	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
 	unsigned int i;
 
 	for (i = m->low; i < m->high; i++)
-		wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+		wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
 }
 
 /*
@@ -375,375 +304,414 @@ mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
  * that can be written to QOS_MSRs.
  * There are currently no SKUs which support non linear delay values.
  */
-u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
 {
 	if (r->membw.delay_linear)
 		return MAX_MBA_BW - bw;
 
 	pr_warn_once("Non Linear delay-bw map not supported but queried\n");
-	return r->default_ctrl;
+	return MAX_MBA_BW;
 }
 
-static void
-mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
-		struct rdt_resource *r)
+static void mba_wrmsr_intel(struct msr_param *m)
 {
+	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
 	unsigned int i;
 
 	/*  Write the delay values for mba. */
 	for (i = m->low; i < m->high; i++)
-		wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r));
+		wrmsrq(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
 }
 
-static void
-cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+static void cat_wrmsr(struct msr_param *m)
 {
+	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
 	unsigned int i;
 
 	for (i = m->low; i < m->high; i++)
-		wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]);
+		wrmsrq(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
 }
 
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
 {
-	struct rdt_domain *d;
-
-	list_for_each_entry(d, &r->domains, list) {
-		/* Find the domain that contains this CPU */
-		if (cpumask_test_cpu(cpu, &d->cpu_mask))
-			return d;
-	}
-
-	return NULL;
+	return resctrl_to_arch_res(r)->num_closid;
 }
 
 void rdt_ctrl_update(void *arg)
 {
+	struct rdt_hw_resource *hw_res;
 	struct msr_param *m = arg;
-	struct rdt_resource *r = m->res;
-	int cpu = smp_processor_id();
-	struct rdt_domain *d;
-
-	d = get_domain_from_cpu(cpu, r);
-	if (d) {
-		r->msr_update(d, m, r);
-		return;
-	}
-	pr_warn_once("cpu %d not found in any domain for resource %s\n",
-		     cpu, r->name);
-}
-
-/*
- * rdt_find_domain - Find a domain in a resource that matches input resource id
- *
- * Search resource r's domain list to find the resource id. If the resource
- * id is found in a domain, return the domain. Otherwise, if requested by
- * caller, return the first domain whose id is bigger than the input id.
- * The domain list is sorted by id in ascending order.
- */
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
-				   struct list_head **pos)
-{
-	struct rdt_domain *d;
-	struct list_head *l;
-
-	if (id < 0)
-		return ERR_PTR(-ENODEV);
-
-	list_for_each(l, &r->domains) {
-		d = list_entry(l, struct rdt_domain, list);
-		/* When id is found, return its domain. */
-		if (id == d->id)
-			return d;
-		/* Stop searching when finding id's position in sorted list. */
-		if (id < d->id)
-			break;
-	}
-
-	if (pos)
-		*pos = l;
 
-	return NULL;
+	hw_res = resctrl_to_arch_res(m->res);
+	hw_res->msr_update(m);
 }
 
-void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
+static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
 {
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	int i;
 
 	/*
 	 * Initialize the Control MSRs to having no control.
 	 * For Cache Allocation: Set all bits in cbm
 	 * For Memory Allocation: Set b/w requested to 100%
-	 * and the bandwidth in MBps to U32_MAX
 	 */
-	for (i = 0; i < r->num_closid; i++, dc++, dm++) {
-		*dc = r->default_ctrl;
-		*dm = MBA_MAX_MBPS;
-	}
+	for (i = 0; i < hw_res->num_closid; i++, dc++)
+		*dc = resctrl_get_default_ctrl(r);
+}
+
+static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
+{
+	kfree(hw_dom->ctrl_val);
+	kfree(hw_dom);
+}
+
+static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
+{
+	int idx;
+
+	for_each_mbm_idx(idx)
+		kfree(hw_dom->arch_mbm_states[idx]);
+	kfree(hw_dom);
 }
 
-static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
 {
+	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	struct msr_param m;
-	u32 *dc, *dm;
+	u32 *dc;
 
-	dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+	dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
+			   GFP_KERNEL);
 	if (!dc)
 		return -ENOMEM;
 
-	dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
-	if (!dm) {
-		kfree(dc);
-		return -ENOMEM;
-	}
-
-	d->ctrl_val = dc;
-	d->mbps_val = dm;
-	setup_default_ctrlval(r, dc, dm);
+	hw_dom->ctrl_val = dc;
+	setup_default_ctrlval(r, dc);
 
+	m.res = r;
+	m.dom = d;
 	m.low = 0;
-	m.high = r->num_closid;
-	r->msr_update(d, &m, r);
+	m.high = hw_res->num_closid;
+	hw_res->msr_update(&m);
 	return 0;
 }
 
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+/**
+ * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
+ * @num_rmid:	The size of the MBM counter array
+ * @hw_dom:	The domain that owns the allocated arrays
+ */
+static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
 {
-	size_t tsize;
-
-	if (is_llc_occupancy_enabled()) {
-		d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
-		if (!d->rmid_busy_llc)
-			return -ENOMEM;
-		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
+	size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]);
+	enum resctrl_event_id eventid;
+	int idx;
+
+	for_each_mbm_event_id(eventid) {
+		if (!resctrl_is_mon_event_enabled(eventid))
+			continue;
+		idx = MBM_STATE_IDX(eventid);
+		hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL);
+		if (!hw_dom->arch_mbm_states[idx])
+			goto cleanup;
 	}
-	if (is_mbm_total_enabled()) {
-		tsize = sizeof(*d->mbm_total);
-		d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
-		if (!d->mbm_total) {
-			bitmap_free(d->rmid_busy_llc);
-			return -ENOMEM;
-		}
-	}
-	if (is_mbm_local_enabled()) {
-		tsize = sizeof(*d->mbm_local);
-		d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
-		if (!d->mbm_local) {
-			bitmap_free(d->rmid_busy_llc);
-			kfree(d->mbm_total);
-			return -ENOMEM;
-		}
+
+	return 0;
+cleanup:
+	for_each_mbm_idx(idx) {
+		kfree(hw_dom->arch_mbm_states[idx]);
+		hw_dom->arch_mbm_states[idx] = NULL;
 	}
 
-	if (is_mbm_enabled()) {
-		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
-		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+	return -ENOMEM;
+}
+
+static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
+{
+	switch (scope) {
+	case RESCTRL_L2_CACHE:
+	case RESCTRL_L3_CACHE:
+		return get_cpu_cacheinfo_id(cpu, scope);
+	case RESCTRL_L3_NODE:
+		return cpu_to_node(cpu);
+	default:
+		break;
 	}
 
-	return 0;
+	return -EINVAL;
 }
 
-/*
- * domain_add_cpu - Add a cpu to a resource's domain list.
- *
- * If an existing domain in the resource r's domain list matches the cpu's
- * resource id, add the cpu in the domain.
- *
- * Otherwise, a new domain is allocated and inserted into the right position
- * in the domain list sorted by id in ascending order.
- *
- * The order in the domain list is visible to users when we print entries
- * in the schemata file and schemata input is validated to have the same order
- * as this list.
- */
-static void domain_add_cpu(int cpu, struct rdt_resource *r)
+static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
 {
-	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+	struct rdt_hw_ctrl_domain *hw_dom;
 	struct list_head *add_pos = NULL;
-	struct rdt_domain *d;
+	struct rdt_domain_hdr *hdr;
+	struct rdt_ctrl_domain *d;
+	int err;
 
-	d = rdt_find_domain(r, id, &add_pos);
-	if (IS_ERR(d)) {
-		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+	lockdep_assert_held(&domain_list_lock);
+
+	if (id < 0) {
+		pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+			     cpu, r->ctrl_scope, r->name);
 		return;
 	}
 
-	if (d) {
-		cpumask_set_cpu(cpu, &d->cpu_mask);
+	hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
+	if (hdr) {
+		if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+			return;
+		d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+
+		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
 		if (r->cache.arch_has_per_cpu_cfg)
 			rdt_domain_reconfigure_cdp(r);
 		return;
 	}
 
-	d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
-	if (!d)
+	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+	if (!hw_dom)
 		return;
 
-	d->id = id;
-	cpumask_set_cpu(cpu, &d->cpu_mask);
+	d = &hw_dom->d_resctrl;
+	d->hdr.id = id;
+	d->hdr.type = RESCTRL_CTRL_DOMAIN;
+	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
 
 	rdt_domain_reconfigure_cdp(r);
 
-	if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
-		kfree(d);
+	if (domain_setup_ctrlval(r, d)) {
+		ctrl_domain_free(hw_dom);
 		return;
 	}
 
-	if (r->mon_capable && domain_setup_mon_state(r, d)) {
-		kfree(d);
+	list_add_tail_rcu(&d->hdr.list, add_pos);
+
+	err = resctrl_online_ctrl_domain(r, d);
+	if (err) {
+		list_del_rcu(&d->hdr.list);
+		synchronize_rcu();
+		ctrl_domain_free(hw_dom);
+	}
+}
+
+static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+{
+	int id = get_domain_id_from_scope(cpu, r->mon_scope);
+	struct list_head *add_pos = NULL;
+	struct rdt_hw_mon_domain *hw_dom;
+	struct rdt_domain_hdr *hdr;
+	struct rdt_mon_domain *d;
+	struct cacheinfo *ci;
+	int err;
+
+	lockdep_assert_held(&domain_list_lock);
+
+	if (id < 0) {
+		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+			     cpu, r->mon_scope, r->name);
 		return;
 	}
 
-	list_add_tail(&d->list, add_pos);
+	hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
+	if (hdr) {
+		if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+			return;
+		d = container_of(hdr, struct rdt_mon_domain, hdr);
 
-	/*
-	 * If resctrl is mounted, add
-	 * per domain monitor data directories.
-	 */
-	if (static_branch_unlikely(&rdt_mon_enable_key))
-		mkdir_mondata_subdir_allrdtgrp(r, d);
+		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+		/* Update the mbm_assign_mode state for the CPU if supported */
+		if (r->mon.mbm_cntr_assignable)
+			resctrl_arch_mbm_cntr_assign_set_one(r);
+		return;
+	}
+
+	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+	if (!hw_dom)
+		return;
+
+	d = &hw_dom->d_resctrl;
+	d->hdr.id = id;
+	d->hdr.type = RESCTRL_MON_DOMAIN;
+	ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+	if (!ci) {
+		pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
+		mon_domain_free(hw_dom);
+		return;
+	}
+	d->ci_id = ci->id;
+	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+
+	/* Update the mbm_assign_mode state for the CPU if supported */
+	if (r->mon.mbm_cntr_assignable)
+		resctrl_arch_mbm_cntr_assign_set_one(r);
+
+	arch_mon_domain_online(r, d);
+
+	if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) {
+		mon_domain_free(hw_dom);
+		return;
+	}
+
+	list_add_tail_rcu(&d->hdr.list, add_pos);
+
+	err = resctrl_online_mon_domain(r, d);
+	if (err) {
+		list_del_rcu(&d->hdr.list);
+		synchronize_rcu();
+		mon_domain_free(hw_dom);
+	}
 }
 
-static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+static void domain_add_cpu(int cpu, struct rdt_resource *r)
 {
-	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
-	struct rdt_domain *d;
+	if (r->alloc_capable)
+		domain_add_cpu_ctrl(cpu, r);
+	if (r->mon_capable)
+		domain_add_cpu_mon(cpu, r);
+}
+
+static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
+{
+	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+	struct rdt_hw_ctrl_domain *hw_dom;
+	struct rdt_domain_hdr *hdr;
+	struct rdt_ctrl_domain *d;
+
+	lockdep_assert_held(&domain_list_lock);
 
-	d = rdt_find_domain(r, id, NULL);
-	if (IS_ERR_OR_NULL(d)) {
-		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+	if (id < 0) {
+		pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+			     cpu, r->ctrl_scope, r->name);
 		return;
 	}
 
-	cpumask_clear_cpu(cpu, &d->cpu_mask);
-	if (cpumask_empty(&d->cpu_mask)) {
-		/*
-		 * If resctrl is mounted, remove all the
-		 * per domain monitor data directories.
-		 */
-		if (static_branch_unlikely(&rdt_mon_enable_key))
-			rmdir_mondata_subdir_allrdtgrp(r, d->id);
-		list_del(&d->list);
-		if (r->mon_capable && is_mbm_enabled())
-			cancel_delayed_work(&d->mbm_over);
-		if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
-			/*
-			 * When a package is going down, forcefully
-			 * decrement rmid->ebusy. There is no way to know
-			 * that the L3 was flushed and hence may lead to
-			 * incorrect counts in rare scenarios, but leaving
-			 * the RMID as busy creates RMID leaks if the
-			 * package never comes back.
-			 */
-			__check_limbo(d, true);
-			cancel_delayed_work(&d->cqm_limbo);
-		}
+	hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL);
+	if (!hdr) {
+		pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
+			id, cpu, r->name);
+		return;
+	}
+
+	if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+		return;
+
+	d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+	hw_dom = resctrl_to_arch_ctrl_dom(d);
+
+	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+	if (cpumask_empty(&d->hdr.cpu_mask)) {
+		resctrl_offline_ctrl_domain(r, d);
+		list_del_rcu(&d->hdr.list);
+		synchronize_rcu();
 
 		/*
-		 * rdt_domain "d" is going to be freed below, so clear
+		 * rdt_ctrl_domain "d" is going to be freed below, so clear
 		 * its pointer from pseudo_lock_region struct.
 		 */
 		if (d->plr)
 			d->plr->d = NULL;
+		ctrl_domain_free(hw_dom);
 
-		kfree(d->ctrl_val);
-		kfree(d->mbps_val);
-		bitmap_free(d->rmid_busy_llc);
-		kfree(d->mbm_total);
-		kfree(d->mbm_local);
-		kfree(d);
 		return;
 	}
+}
 
-	if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
-		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
-			cancel_delayed_work(&d->mbm_over);
-			mbm_setup_overflow_handler(d, 0);
-		}
-		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
-		    has_busy_rmid(r, d)) {
-			cancel_delayed_work(&d->cqm_limbo);
-			cqm_setup_limbo_handler(d, 0);
-		}
+static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
+{
+	int id = get_domain_id_from_scope(cpu, r->mon_scope);
+	struct rdt_hw_mon_domain *hw_dom;
+	struct rdt_domain_hdr *hdr;
+	struct rdt_mon_domain *d;
+
+	lockdep_assert_held(&domain_list_lock);
+
+	if (id < 0) {
+		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+			     cpu, r->mon_scope, r->name);
+		return;
+	}
+
+	hdr = resctrl_find_domain(&r->mon_domains, id, NULL);
+	if (!hdr) {
+		pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
+			id, cpu, r->name);
+		return;
+	}
+
+	if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+		return;
+
+	d = container_of(hdr, struct rdt_mon_domain, hdr);
+	hw_dom = resctrl_to_arch_mon_dom(d);
+
+	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+	if (cpumask_empty(&d->hdr.cpu_mask)) {
+		resctrl_offline_mon_domain(r, d);
+		list_del_rcu(&d->hdr.list);
+		synchronize_rcu();
+		mon_domain_free(hw_dom);
+
+		return;
 	}
 }
 
+static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+{
+	if (r->alloc_capable)
+		domain_remove_cpu_ctrl(cpu, r);
+	if (r->mon_capable)
+		domain_remove_cpu_mon(cpu, r);
+}
+
 static void clear_closid_rmid(int cpu)
 {
 	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
 
-	state->default_closid = 0;
-	state->default_rmid = 0;
-	state->cur_closid = 0;
-	state->cur_rmid = 0;
-	wrmsr(IA32_PQR_ASSOC, 0, 0);
+	state->default_closid = RESCTRL_RESERVED_CLOSID;
+	state->default_rmid = RESCTRL_RESERVED_RMID;
+	state->cur_closid = RESCTRL_RESERVED_CLOSID;
+	state->cur_rmid = RESCTRL_RESERVED_RMID;
+	wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
+	      RESCTRL_RESERVED_CLOSID);
 }
 
-static int resctrl_online_cpu(unsigned int cpu)
+static int resctrl_arch_online_cpu(unsigned int cpu)
 {
 	struct rdt_resource *r;
 
-	mutex_lock(&rdtgroup_mutex);
+	mutex_lock(&domain_list_lock);
 	for_each_capable_rdt_resource(r)
 		domain_add_cpu(cpu, r);
-	/* The cpu is set in default rdtgroup after online. */
-	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+	mutex_unlock(&domain_list_lock);
+
 	clear_closid_rmid(cpu);
-	mutex_unlock(&rdtgroup_mutex);
+	resctrl_online_cpu(cpu);
 
 	return 0;
 }
 
-static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
-{
-	struct rdtgroup *cr;
-
-	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
-			break;
-		}
-	}
-}
-
-static int resctrl_offline_cpu(unsigned int cpu)
+static int resctrl_arch_offline_cpu(unsigned int cpu)
 {
-	struct rdtgroup *rdtgrp;
 	struct rdt_resource *r;
 
-	mutex_lock(&rdtgroup_mutex);
+	resctrl_offline_cpu(cpu);
+
+	mutex_lock(&domain_list_lock);
 	for_each_capable_rdt_resource(r)
 		domain_remove_cpu(cpu, r);
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
-			clear_childcpus(rdtgrp, cpu);
-			break;
-		}
-	}
+	mutex_unlock(&domain_list_lock);
+
 	clear_closid_rmid(cpu);
-	mutex_unlock(&rdtgroup_mutex);
 
 	return 0;
 }
 
-/*
- * Choose a width for the resource name and resource data based on the
- * resource that has widest name and cbm.
- */
-static __init void rdt_init_padding(void)
-{
-	struct rdt_resource *r;
-	int cl;
-
-	for_each_alloc_capable_rdt_resource(r) {
-		cl = strlen(r->name);
-		if (cl > max_name_width)
-			max_name_width = cl;
-
-		if (r->data_width > max_data_width)
-			max_data_width = r->data_width;
-	}
-}
-
 enum {
 	RDT_FLAG_CMT,
 	RDT_FLAG_MBM_TOTAL,
@@ -753,6 +721,10 @@ enum {
 	RDT_FLAG_L2_CAT,
 	RDT_FLAG_L2_CDP,
 	RDT_FLAG_MBA,
+	RDT_FLAG_SMBA,
+	RDT_FLAG_BMEC,
+	RDT_FLAG_ABMC,
+	RDT_FLAG_SDCIAE,
 };
 
 #define RDT_OPT(idx, n, f)	\
@@ -767,7 +739,7 @@ struct rdt_options {
 	bool	force_off, force_on;
 };
 
-static struct rdt_options rdt_options[]  __initdata = {
+static struct rdt_options rdt_options[]  __ro_after_init = {
 	RDT_OPT(RDT_FLAG_CMT,	    "cmt",	X86_FEATURE_CQM_OCCUP_LLC),
 	RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
 	RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
@@ -776,6 +748,10 @@ static struct rdt_options rdt_options[]  __initdata = {
 	RDT_OPT(RDT_FLAG_L2_CAT,    "l2cat",	X86_FEATURE_CAT_L2),
 	RDT_OPT(RDT_FLAG_L2_CDP,    "l2cdp",	X86_FEATURE_CDP_L2),
 	RDT_OPT(RDT_FLAG_MBA,	    "mba",	X86_FEATURE_MBA),
+	RDT_OPT(RDT_FLAG_SMBA,	    "smba",	X86_FEATURE_SMBA),
+	RDT_OPT(RDT_FLAG_BMEC,	    "bmec",	X86_FEATURE_BMEC),
+	RDT_OPT(RDT_FLAG_ABMC,	    "abmc",	X86_FEATURE_ABMC),
+	RDT_OPT(RDT_FLAG_SDCIAE,    "sdciae",	X86_FEATURE_SDCIAE),
 };
 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
 
@@ -805,7 +781,7 @@ static int __init set_rdt_options(char *str)
 }
 __setup("rdt", set_rdt_options);
 
-static bool __init rdt_cpu_has(int flag)
+bool rdt_cpu_has(int flag)
 {
 	bool ret = boot_cpu_has(flag);
 	struct rdt_options *o;
@@ -825,21 +801,52 @@ static bool __init rdt_cpu_has(int flag)
 	return ret;
 }
 
+bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
+{
+	if (!rdt_cpu_has(X86_FEATURE_BMEC))
+		return false;
+
+	switch (evt) {
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
+	default:
+		return false;
+	}
+}
+
 static __init bool get_mem_config(void)
 {
+	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
+
 	if (!rdt_cpu_has(X86_FEATURE_MBA))
 		return false;
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]);
+		return __get_mem_config_intel(&hw_res->r_resctrl);
 	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]);
+		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
+
+	return false;
+}
+
+static __init bool get_slow_mem_config(void)
+{
+	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA];
+
+	if (!rdt_cpu_has(X86_FEATURE_SMBA))
+		return false;
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
 
 	return false;
 }
 
 static __init bool get_rdt_alloc_resources(void)
 {
+	struct rdt_resource *r;
 	bool ret = false;
 
 	if (rdt_alloc_capable)
@@ -849,14 +856,18 @@ static __init bool get_rdt_alloc_resources(void)
 		return false;
 
 	if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
-		rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+		rdt_get_cache_alloc_cfg(1, r);
 		if (rdt_cpu_has(X86_FEATURE_CDP_L3))
 			rdt_get_cdp_l3_config();
+		if (rdt_cpu_has(X86_FEATURE_SDCIAE))
+			rdt_set_io_alloc_capable(r);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
 		/* CPUID 0x10.2 fields are same format at 0x10.1 */
-		rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+		r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
+		rdt_get_cache_alloc_cfg(2, r);
 		if (rdt_cpu_has(X86_FEATURE_CDP_L2))
 			rdt_get_cdp_l2_config();
 		ret = true;
@@ -865,38 +876,52 @@ static __init bool get_rdt_alloc_resources(void)
 	if (get_mem_config())
 		ret = true;
 
+	if (get_slow_mem_config())
+		ret = true;
+
 	return ret;
 }
 
 static __init bool get_rdt_mon_resources(void)
 {
-	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
-		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
-	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
-		rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
-	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
-		rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
-
-	if (!rdt_mon_features)
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	bool ret = false;
+
+	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
+		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID);
+		ret = true;
+	}
+	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
+		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
+		ret = true;
+	}
+	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
+		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
+		ret = true;
+	}
+	if (rdt_cpu_has(X86_FEATURE_ABMC))
+		ret = true;
+
+	if (!ret)
 		return false;
 
-	return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
+	return !rdt_get_mon_l3_config(r);
 }
 
 static __init void __check_quirks_intel(void)
 {
-	switch (boot_cpu_data.x86_model) {
-	case INTEL_FAM6_HASWELL_X:
+	switch (boot_cpu_data.x86_vfm) {
+	case INTEL_HASWELL_X:
 		if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
 			cache_alloc_hsw_probe();
 		break;
-	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_SKYLAKE_X:
 		if (boot_cpu_data.x86_stepping <= 4)
 			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
 		else
 			set_rdt_options("!l3cat");
 		fallthrough;
-	case INTEL_FAM6_BROADWELL_X:
+	case INTEL_BROADWELL_X:
 		intel_rdt_mbm_apply_quirk();
 		break;
 	}
@@ -918,42 +943,42 @@ static __init bool get_rdt_resources(void)
 
 static __init void rdt_init_res_defs_intel(void)
 {
+	struct rdt_hw_resource *hw_res;
 	struct rdt_resource *r;
 
 	for_each_rdt_resource(r) {
+		hw_res = resctrl_to_arch_res(r);
+
 		if (r->rid == RDT_RESOURCE_L3 ||
-		    r->rid == RDT_RESOURCE_L3DATA ||
-		    r->rid == RDT_RESOURCE_L3CODE ||
-		    r->rid == RDT_RESOURCE_L2 ||
-		    r->rid == RDT_RESOURCE_L2DATA ||
-		    r->rid == RDT_RESOURCE_L2CODE) {
-			r->cache.arch_has_sparse_bitmaps = false;
-			r->cache.arch_has_empty_bitmaps = false;
+		    r->rid == RDT_RESOURCE_L2) {
 			r->cache.arch_has_per_cpu_cfg = false;
+			r->cache.min_cbm_bits = 1;
 		} else if (r->rid == RDT_RESOURCE_MBA) {
-			r->msr_base = MSR_IA32_MBA_THRTL_BASE;
-			r->msr_update = mba_wrmsr_intel;
+			hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
+			hw_res->msr_update = mba_wrmsr_intel;
 		}
 	}
 }
 
 static __init void rdt_init_res_defs_amd(void)
 {
+	struct rdt_hw_resource *hw_res;
 	struct rdt_resource *r;
 
 	for_each_rdt_resource(r) {
+		hw_res = resctrl_to_arch_res(r);
+
 		if (r->rid == RDT_RESOURCE_L3 ||
-		    r->rid == RDT_RESOURCE_L3DATA ||
-		    r->rid == RDT_RESOURCE_L3CODE ||
-		    r->rid == RDT_RESOURCE_L2 ||
-		    r->rid == RDT_RESOURCE_L2DATA ||
-		    r->rid == RDT_RESOURCE_L2CODE) {
-			r->cache.arch_has_sparse_bitmaps = true;
-			r->cache.arch_has_empty_bitmaps = true;
+		    r->rid == RDT_RESOURCE_L2) {
+			r->cache.arch_has_sparse_bitmasks = true;
 			r->cache.arch_has_per_cpu_cfg = true;
+			r->cache.min_cbm_bits = 0;
 		} else if (r->rid == RDT_RESOURCE_MBA) {
-			r->msr_base = MSR_IA32_MBA_BW_BASE;
-			r->msr_update = mba_wrmsr_amd;
+			hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
+			hw_res->msr_update = mba_wrmsr_amd;
+		} else if (r->rid == RDT_RESOURCE_SMBA) {
+			hw_res->msr_base = MSR_IA32_SMBA_BW_BASE;
+			hw_res->msr_update = mba_wrmsr_amd;
 		}
 	}
 }
@@ -971,7 +996,7 @@ static enum cpuhp_state rdt_online;
 /* Runs once on the BSP during boot. */
 void resctrl_cpu_detect(struct cpuinfo_x86 *c)
 {
-	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+	if (!cpu_has(c, X86_FEATURE_CQM_LLC) && !cpu_has(c, X86_FEATURE_ABMC)) {
 		c->x86_cache_max_rmid  = -1;
 		c->x86_cache_occ_scale = -1;
 		c->x86_cache_mbm_width_offset = -1;
@@ -983,7 +1008,8 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
 
 	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
 	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
-	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL) ||
+	    cpu_has(c, X86_FEATURE_ABMC)) {
 		u32 eax, ebx, ecx, edx;
 
 		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
@@ -998,10 +1024,14 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
 	}
 }
 
-static int __init resctrl_late_init(void)
+static int __init resctrl_arch_late_init(void)
 {
 	struct rdt_resource *r;
-	int state, ret;
+	int state, ret, i;
+
+	/* for_each_rdt_resource() requires all rid to be initialised. */
+	for (i = 0; i < RDT_NUM_RESOURCES; i++)
+		rdt_resources_all[i].r_resctrl.rid = i;
 
 	/*
 	 * Initialize functions(or definitions) that are different
@@ -1014,15 +1044,14 @@ static int __init resctrl_late_init(void)
 	if (!get_rdt_resources())
 		return -ENODEV;
 
-	rdt_init_padding();
-
 	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 				  "x86/resctrl/cat:online:",
-				  resctrl_online_cpu, resctrl_offline_cpu);
+				  resctrl_arch_online_cpu,
+				  resctrl_arch_offline_cpu);
 	if (state < 0)
 		return state;
 
-	ret = rdtgroup_init();
+	ret = resctrl_init();
 	if (ret) {
 		cpuhp_remove_state(state);
 		return ret;
@@ -1038,12 +1067,13 @@ static int __init resctrl_late_init(void)
 	return 0;
 }
 
-late_initcall(resctrl_late_init);
+late_initcall(resctrl_arch_late_init);
 
-static void __exit resctrl_exit(void)
+static void __exit resctrl_arch_exit(void)
 {
 	cpuhp_remove_state(rdt_online);
-	rdtgroup_exit();
+
+	resctrl_exit();
 }
 
-__exitcall(resctrl_exit);
+__exitcall(resctrl_arch_exit);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index c877642e8a14..b20e705606b8 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -16,475 +16,118 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/cpu.h>
-#include <linux/kernfs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include "internal.h"
-
-/*
- * Check whether MBA bandwidth percentage value is correct. The value is
- * checked against the minimum and max bandwidth values specified by the
- * hardware. The allocated bandwidth percentage is rounded to the next
- * control step available on the hardware.
- */
-static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
-{
-	unsigned long bw;
-	int ret;
-
-	/*
-	 * Only linear delay values is supported for current Intel SKUs.
-	 */
-	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
-		rdt_last_cmd_puts("No support for non-linear MB domains\n");
-		return false;
-	}
-
-	ret = kstrtoul(buf, 10, &bw);
-	if (ret) {
-		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
-		return false;
-	}
-
-	if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
-	    !is_mba_sc(r)) {
-		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
-				    r->membw.min_bw, r->default_ctrl);
-		return false;
-	}
-
-	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
-	return true;
-}
-
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
-	     struct rdt_domain *d)
-{
-	unsigned long bw_val;
-
-	if (d->have_new_ctrl) {
-		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
-		return -EINVAL;
-	}
-
-	if (!bw_validate(data->buf, &bw_val, r))
-		return -EINVAL;
-	d->new_ctrl = bw_val;
-	d->have_new_ctrl = true;
-
-	return 0;
-}
-
-/*
- * Check whether a cache bit mask is valid.
- * For Intel the SDM says:
- *	Please note that all (and only) contiguous '1' combinations
- *	are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
- * Additionally Haswell requires at least two bits set.
- * AMD allows non-contiguous bitmasks.
- */
-static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
-{
-	unsigned long first_bit, zero_bit, val;
-	unsigned int cbm_len = r->cache.cbm_len;
-	int ret;
-
-	ret = kstrtoul(buf, 16, &val);
-	if (ret) {
-		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
-		return false;
-	}
 
-	if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
-	    val > r->default_ctrl) {
-		rdt_last_cmd_puts("Mask out of range\n");
-		return false;
-	}
-
-	first_bit = find_first_bit(&val, cbm_len);
-	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
-	/* Are non-contiguous bitmaps allowed? */
-	if (!r->cache.arch_has_sparse_bitmaps &&
-	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
-		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
-		return false;
-	}
-
-	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
-		rdt_last_cmd_printf("Need at least %d bits in the mask\n",
-				    r->cache.min_cbm_bits);
-		return false;
-	}
-
-	*data = val;
-	return true;
-}
+#include "internal.h"
 
-/*
- * Read one cache bit mask (hex). Check that it is valid for the current
- * resource type.
- */
-int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
-	      struct rdt_domain *d)
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
+			    u32 closid, enum resctrl_conf_type t, u32 cfg_val)
 {
-	struct rdtgroup *rdtgrp = data->rdtgrp;
-	u32 cbm_val;
-
-	if (d->have_new_ctrl) {
-		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
-		return -EINVAL;
-	}
-
-	/*
-	 * Cannot set up more than one pseudo-locked region in a cache
-	 * hierarchy.
-	 */
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
-	    rdtgroup_pseudo_locked_in_hierarchy(d)) {
-		rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
-		return -EINVAL;
-	}
-
-	if (!cbm_validate(data->buf, &cbm_val, r))
-		return -EINVAL;
-
-	if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
-	     rdtgrp->mode == RDT_MODE_SHAREABLE) &&
-	    rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
-		rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
-		return -EINVAL;
-	}
+	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	u32 idx = resctrl_get_config_index(closid, t);
+	struct msr_param msr_param;
 
-	/*
-	 * The CBM may not overlap with the CBM of another closid if
-	 * either is exclusive.
-	 */
-	if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) {
-		rdt_last_cmd_puts("Overlaps with exclusive group\n");
+	if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
 		return -EINVAL;
-	}
 
-	if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) {
-		if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			rdt_last_cmd_puts("Overlaps with other group\n");
-			return -EINVAL;
-		}
-	}
+	hw_dom->ctrl_val[idx] = cfg_val;
 
-	d->new_ctrl = cbm_val;
-	d->have_new_ctrl = true;
+	msr_param.res = r;
+	msr_param.dom = d;
+	msr_param.low = idx;
+	msr_param.high = idx + 1;
+	hw_res->msr_update(&msr_param);
 
 	return 0;
 }
 
-/*
- * For each domain in this resource we expect to find a series of:
- *	id=mask
- * separated by ";". The "id" is in decimal, and must match one of
- * the "id"s for this resource.
- */
-static int parse_line(char *line, struct rdt_resource *r,
-		      struct rdtgroup *rdtgrp)
-{
-	struct rdt_parse_data data;
-	char *dom = NULL, *id;
-	struct rdt_domain *d;
-	unsigned long dom_id;
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
-	    r->rid == RDT_RESOURCE_MBA) {
-		rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
-		return -EINVAL;
-	}
-
-next:
-	if (!line || line[0] == '\0')
-		return 0;
-	dom = strsep(&line, ";");
-	id = strsep(&dom, "=");
-	if (!dom || kstrtoul(id, 10, &dom_id)) {
-		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
-		return -EINVAL;
-	}
-	dom = strim(dom);
-	list_for_each_entry(d, &r->domains, list) {
-		if (d->id == dom_id) {
-			data.buf = dom;
-			data.rdtgrp = rdtgrp;
-			if (r->parse_ctrlval(&data, r, d))
-				return -EINVAL;
-			if (rdtgrp->mode ==  RDT_MODE_PSEUDO_LOCKSETUP) {
-				/*
-				 * In pseudo-locking setup mode and just
-				 * parsed a valid CBM that should be
-				 * pseudo-locked. Only one locked region per
-				 * resource group and domain so just do
-				 * the required initialization for single
-				 * region and return.
-				 */
-				rdtgrp->plr->r = r;
-				rdtgrp->plr->d = d;
-				rdtgrp->plr->cbm = d->new_ctrl;
-				d->plr = rdtgrp->plr;
-				return 0;
-			}
-			goto next;
-		}
-	}
-	return -EINVAL;
-}
-
-int update_domains(struct rdt_resource *r, int closid)
+int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
 {
+	struct resctrl_staged_config *cfg;
+	struct rdt_hw_ctrl_domain *hw_dom;
 	struct msr_param msr_param;
-	cpumask_var_t cpu_mask;
-	struct rdt_domain *d;
-	bool mba_sc;
-	u32 *dc;
-	int cpu;
-
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	msr_param.low = closid;
-	msr_param.high = msr_param.low + 1;
-	msr_param.res = r;
-
-	mba_sc = is_mba_sc(r);
-	list_for_each_entry(d, &r->domains, list) {
-		dc = !mba_sc ? d->ctrl_val : d->mbps_val;
-		if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
-			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-			dc[closid] = d->new_ctrl;
+	struct rdt_ctrl_domain *d;
+	enum resctrl_conf_type t;
+	u32 idx;
+
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
+	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+		hw_dom = resctrl_to_arch_ctrl_dom(d);
+		msr_param.res = NULL;
+		for (t = 0; t < CDP_NUM_TYPES; t++) {
+			cfg = &hw_dom->d_resctrl.staged_config[t];
+			if (!cfg->have_new_ctrl)
+				continue;
+
+			idx = resctrl_get_config_index(closid, t);
+			if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
+				continue;
+			hw_dom->ctrl_val[idx] = cfg->new_ctrl;
+
+			if (!msr_param.res) {
+				msr_param.low = idx;
+				msr_param.high = msr_param.low + 1;
+				msr_param.res = r;
+				msr_param.dom = d;
+			} else {
+				msr_param.low = min(msr_param.low, idx);
+				msr_param.high = max(msr_param.high, idx + 1);
+			}
 		}
+		if (msr_param.res)
+			smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
 	}
 
-	/*
-	 * Avoid writing the control msr with control values when
-	 * MBA software controller is enabled
-	 */
-	if (cpumask_empty(cpu_mask) || mba_sc)
-		goto done;
-	cpu = get_cpu();
-	/* Update resource control msr on this CPU if it's in cpu_mask. */
-	if (cpumask_test_cpu(cpu, cpu_mask))
-		rdt_ctrl_update(&msr_param);
-	/* Update resource control msr on other CPUs. */
-	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
-	put_cpu();
-
-done:
-	free_cpumask_var(cpu_mask);
-
 	return 0;
 }
 
-static int rdtgroup_parse_resource(char *resname, char *tok,
-				   struct rdtgroup *rdtgrp)
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
+			    u32 closid, enum resctrl_conf_type type)
 {
-	struct rdt_resource *r;
+	struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
+	u32 idx = resctrl_get_config_index(closid, type);
 
-	for_each_alloc_enabled_rdt_resource(r) {
-		if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid)
-			return parse_line(tok, r, rdtgrp);
-	}
-	rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
-	return -EINVAL;
+	return hw_dom->ctrl_val[idx];
 }
 
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
-				char *buf, size_t nbytes, loff_t off)
+bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
 {
-	struct rdtgroup *rdtgrp;
-	struct rdt_domain *dom;
-	struct rdt_resource *r;
-	char *tok, *resname;
-	int ret = 0;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-	buf[nbytes - 1] = '\0';
-
-	cpus_read_lock();
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		cpus_read_unlock();
-		return -ENOENT;
-	}
-	rdt_last_cmd_clear();
-
-	/*
-	 * No changes to pseudo-locked region allowed. It has to be removed
-	 * and re-created instead.
-	 */
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Resource group is pseudo-locked\n");
-		goto out;
-	}
-
-	for_each_alloc_enabled_rdt_resource(r) {
-		list_for_each_entry(dom, &r->domains, list)
-			dom->have_new_ctrl = false;
-	}
-
-	while ((tok = strsep(&buf, "\n")) != NULL) {
-		resname = strim(strsep(&tok, ":"));
-		if (!tok) {
-			rdt_last_cmd_puts("Missing ':'\n");
-			ret = -EINVAL;
-			goto out;
-		}
-		if (tok[0] == '\0') {
-			rdt_last_cmd_printf("Missing '%s' value\n", resname);
-			ret = -EINVAL;
-			goto out;
-		}
-		ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
-		if (ret)
-			goto out;
-	}
-
-	for_each_alloc_enabled_rdt_resource(r) {
-		ret = update_domains(r, rdtgrp->closid);
-		if (ret)
-			goto out;
-	}
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		/*
-		 * If pseudo-locking fails we keep the resource group in
-		 * mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
-		 * active and updated for just the domain the pseudo-locked
-		 * region was requested for.
-		 */
-		ret = rdtgroup_pseudo_lock_create(rdtgrp);
-	}
-
-out:
-	rdtgroup_kn_unlock(of->kn);
-	cpus_read_unlock();
-	return ret ?: nbytes;
+	return resctrl_to_arch_res(r)->sdciae_enabled;
 }
 
-static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
+static void resctrl_sdciae_set_one_amd(void *arg)
 {
-	struct rdt_domain *dom;
-	bool sep = false;
-	u32 ctrl_val;
-
-	seq_printf(s, "%*s:", max_name_width, r->name);
-	list_for_each_entry(dom, &r->domains, list) {
-		if (sep)
-			seq_puts(s, ";");
+	bool *enable = arg;
 
-		ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
-			    dom->mbps_val[closid]);
-		seq_printf(s, r->format_str, dom->id, max_data_width,
-			   ctrl_val);
-		sep = true;
-	}
-	seq_puts(s, "\n");
+	if (*enable)
+		msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
+	else
+		msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT);
 }
 
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
-			   struct seq_file *s, void *v)
+static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable)
 {
-	struct rdtgroup *rdtgrp;
-	struct rdt_resource *r;
-	int ret = 0;
-	u32 closid;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (rdtgrp) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			for_each_alloc_enabled_rdt_resource(r)
-				seq_printf(s, "%s:uninitialized\n", r->name);
-		} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-			if (!rdtgrp->plr->d) {
-				rdt_last_cmd_clear();
-				rdt_last_cmd_puts("Cache domain offline\n");
-				ret = -ENODEV;
-			} else {
-				seq_printf(s, "%s:%d=%x\n",
-					   rdtgrp->plr->r->name,
-					   rdtgrp->plr->d->id,
-					   rdtgrp->plr->cbm);
-			}
-		} else {
-			closid = rdtgrp->closid;
-			for_each_alloc_enabled_rdt_resource(r) {
-				if (closid < r->num_closid)
-					show_doms(s, r, closid);
-			}
-		}
-	} else {
-		ret = -ENOENT;
-	}
-	rdtgroup_kn_unlock(of->kn);
-	return ret;
-}
+	struct rdt_ctrl_domain *d;
 
-void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
-		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
-		    int evtid, int first)
-{
-	/*
-	 * setup the parameters to send to the IPI to read the data.
-	 */
-	rr->rgrp = rdtgrp;
-	rr->evtid = evtid;
-	rr->r = r;
-	rr->d = d;
-	rr->val = 0;
-	rr->first = first;
+	/* Walking r->ctrl_domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
 
-	smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+	/* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */
+	list_for_each_entry(d, &r->ctrl_domains, hdr.list)
+		on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1);
 }
 
-int rdtgroup_mondata_show(struct seq_file *m, void *arg)
+int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
 {
-	struct kernfs_open_file *of = m->private;
-	u32 resid, evtid, domid;
-	struct rdtgroup *rdtgrp;
-	struct rdt_resource *r;
-	union mon_data_bits md;
-	struct rdt_domain *d;
-	struct rmid_read rr;
-	int ret = 0;
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		ret = -ENOENT;
-		goto out;
+	if (hw_res->r_resctrl.cache.io_alloc_capable &&
+	    hw_res->sdciae_enabled != enable) {
+		_resctrl_sdciae_enable(r, enable);
+		hw_res->sdciae_enabled = enable;
 	}
 
-	md.priv = of->kn->priv;
-	resid = md.u.rid;
-	domid = md.u.domid;
-	evtid = md.u.evtid;
-
-	r = &rdt_resources_all[resid];
-	d = rdt_find_domain(r, domid, NULL);
-	if (IS_ERR_OR_NULL(d)) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	mon_event_read(&rr, r, d, rdtgrp, evtid, false);
-
-	if (rr.val & RMID_VAL_ERROR)
-		seq_puts(m, "Error\n");
-	else if (rr.val & RMID_VAL_UNAVAIL)
-		seq_puts(m, "Unavailable\n");
-	else
-		seq_printf(m, "%llu\n", rr.val * r->mon_scale);
-
-out:
-	rdtgroup_kn_unlock(of->kn);
-	return ret;
+	return 0;
 }
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 6a5f60a37219..4a916c84a322 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -2,45 +2,22 @@
 #ifndef _ASM_X86_RESCTRL_INTERNAL_H
 #define _ASM_X86_RESCTRL_INTERNAL_H
 
-#include <linux/sched.h>
-#include <linux/kernfs.h>
-#include <linux/fs_context.h>
-#include <linux/jump_label.h>
-
-#define MSR_IA32_L3_QOS_CFG		0xc81
-#define MSR_IA32_L2_QOS_CFG		0xc82
-#define MSR_IA32_L3_CBM_BASE		0xc90
-#define MSR_IA32_L2_CBM_BASE		0xd10
-#define MSR_IA32_MBA_THRTL_BASE		0xd50
-#define MSR_IA32_MBA_BW_BASE		0xc0000200
-
-#define MSR_IA32_QM_CTR			0x0c8e
-#define MSR_IA32_QM_EVTSEL		0x0c8d
+#include <linux/resctrl.h>
 
 #define L3_QOS_CDP_ENABLE		0x01ULL
 
 #define L2_QOS_CDP_ENABLE		0x01ULL
 
-/*
- * Event IDs are used to program IA32_QM_EVTSEL before reading event
- * counter from IA32_QM_CTR
- */
-#define QOS_L3_OCCUP_EVENT_ID		0x01
-#define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
-#define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
-
-#define CQM_LIMBOCHECK_INTERVAL	1000
-
 #define MBM_CNTR_WIDTH_BASE		24
-#define MBM_OVERFLOW_INTERVAL		1000
-#define MAX_MBA_BW			100u
+
 #define MBA_IS_LINEAR			0x4
-#define MBA_MAX_MBPS			U32_MAX
-#define MAX_MBA_BW_AMD			0x800
+
 #define MBM_CNTR_WIDTH_OFFSET_AMD	20
 
 #define RMID_VAL_ERROR			BIT_ULL(63)
+
 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
+
 /*
  * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
  * data to be returned. The counter width is discovered from the hardware
@@ -48,510 +25,122 @@
  */
 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
 
-
-struct rdt_fs_context {
-	struct kernfs_fs_context	kfc;
-	bool				enable_cdpl2;
-	bool				enable_cdpl3;
-	bool				enable_mba_mbps;
-};
-
-static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
-{
-	struct kernfs_fs_context *kfc = fc->fs_private;
-
-	return container_of(kfc, struct rdt_fs_context, kfc);
-}
-
-DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
-DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
-
-/**
- * struct mon_evt - Entry in the event list of a resource
- * @evtid:		event id
- * @name:		name of the event
- * @list:		entry in &rdt_resource->evt_list
- */
-struct mon_evt {
-	u32			evtid;
-	char			*name;
-	struct list_head	list;
-};
-
 /**
- * union mon_data_bits - Monitoring details for each event file
- * @priv:              Used to store monitoring event data in @u
- *                     as kernfs private data
- * @rid:               Resource id associated with the event file
- * @evtid:             Event id associated with the event file
- * @domid:             The domain to which the event file belongs
- * @u:                 Name of the bit fields struct
+ * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s
+ *			   return value.
+ * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
+ * @prev_msr:	Value of IA32_QM_CTR last time it was read for the RMID used to
+ *		find this struct.
  */
-union mon_data_bits {
-	void *priv;
-	struct {
-		unsigned int rid	: 10;
-		unsigned int evtid	: 8;
-		unsigned int domid	: 14;
-	} u;
-};
-
-struct rmid_read {
-	struct rdtgroup		*rgrp;
-	struct rdt_resource	*r;
-	struct rdt_domain	*d;
-	int			evtid;
-	bool			first;
-	u64			val;
-};
-
-extern unsigned int resctrl_cqm_threshold;
-extern bool rdt_alloc_capable;
-extern bool rdt_mon_capable;
-extern unsigned int rdt_mon_features;
-
-enum rdt_group_type {
-	RDTCTRL_GROUP = 0,
-	RDTMON_GROUP,
-	RDT_NUM_GROUP,
+struct arch_mbm_state {
+	u64	chunks;
+	u64	prev_msr;
 };
 
-/**
- * enum rdtgrp_mode - Mode of a RDT resource group
- * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
- * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
- * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
- * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
- *                          allowed AND the allocations are Cache Pseudo-Locked
- * @RDT_NUM_MODES: Total number of modes
- *
- * The mode of a resource group enables control over the allowed overlap
- * between allocations associated with different resource groups (classes
- * of service). User is able to modify the mode of a resource group by
- * writing to the "mode" resctrl file associated with the resource group.
- *
- * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
- * writing the appropriate text to the "mode" file. A resource group enters
- * "pseudo-locked" mode after the schemata is written while the resource
- * group is in "pseudo-locksetup" mode.
- */
-enum rdtgrp_mode {
-	RDT_MODE_SHAREABLE = 0,
-	RDT_MODE_EXCLUSIVE,
-	RDT_MODE_PSEUDO_LOCKSETUP,
-	RDT_MODE_PSEUDO_LOCKED,
-
-	/* Must be last */
-	RDT_NUM_MODES,
-};
+/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
+#define ABMC_ENABLE_BIT			0
 
-/**
- * struct mongroup - store mon group's data in resctrl fs.
- * @mon_data_kn:		kernfs node for the mon_data directory
- * @parent:			parent rdtgrp
- * @crdtgrp_list:		child rdtgroup node list
- * @rmid:			rmid for this rdtgroup
+/*
+ * Qos Event Identifiers.
  */
-struct mongroup {
-	struct kernfs_node	*mon_data_kn;
-	struct rdtgroup		*parent;
-	struct list_head	crdtgrp_list;
-	u32			rmid;
-};
+#define ABMC_EXTENDED_EVT_ID		BIT(31)
+#define ABMC_EVT_ID			BIT(0)
 
-/**
- * struct pseudo_lock_region - pseudo-lock region information
- * @r:			RDT resource to which this pseudo-locked region
- *			belongs
- * @d:			RDT domain to which this pseudo-locked region
- *			belongs
- * @cbm:		bitmask of the pseudo-locked region
- * @lock_thread_wq:	waitqueue used to wait on the pseudo-locking thread
- *			completion
- * @thread_done:	variable used by waitqueue to test if pseudo-locking
- *			thread completed
- * @cpu:		core associated with the cache on which the setup code
- *			will be run
- * @line_size:		size of the cache lines
- * @size:		size of pseudo-locked region in bytes
- * @kmem:		the kernel memory associated with pseudo-locked region
- * @minor:		minor number of character device associated with this
- *			region
- * @debugfs_dir:	pointer to this region's directory in the debugfs
- *			filesystem
- * @pm_reqs:		Power management QoS requests related to this region
- */
-struct pseudo_lock_region {
-	struct rdt_resource	*r;
-	struct rdt_domain	*d;
-	u32			cbm;
-	wait_queue_head_t	lock_thread_wq;
-	int			thread_done;
-	int			cpu;
-	unsigned int		line_size;
-	unsigned int		size;
-	void			*kmem;
-	unsigned int		minor;
-	struct dentry		*debugfs_dir;
-	struct list_head	pm_reqs;
-};
+/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */
+#define SDCIAE_ENABLE_BIT		1
 
 /**
- * struct rdtgroup - store rdtgroup's data in resctrl file system.
- * @kn:				kernfs node
- * @rdtgroup_list:		linked list for all rdtgroups
- * @closid:			closid for this rdtgroup
- * @cpu_mask:			CPUs assigned to this rdtgroup
- * @flags:			status bits
- * @waitcount:			how many cpus expect to find this
- *				group when they acquire rdtgroup_mutex
- * @type:			indicates type of this rdtgroup - either
- *				monitor only or ctrl_mon group
- * @mon:			mongroup related data
- * @mode:			mode of resource group
- * @plr:			pseudo-locked region
+ * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
+ *			       a resource for a control function
+ * @d_resctrl:	Properties exposed to the resctrl file system
+ * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
  */
-struct rdtgroup {
-	struct kernfs_node		*kn;
-	struct list_head		rdtgroup_list;
-	u32				closid;
-	struct cpumask			cpu_mask;
-	int				flags;
-	atomic_t			waitcount;
-	enum rdt_group_type		type;
-	struct mongroup			mon;
-	enum rdtgrp_mode		mode;
-	struct pseudo_lock_region	*plr;
+struct rdt_hw_ctrl_domain {
+	struct rdt_ctrl_domain		d_resctrl;
+	u32				*ctrl_val;
 };
 
-/* rdtgroup.flags */
-#define	RDT_DELETED		1
-
-/* rftype.flags */
-#define RFTYPE_FLAGS_CPUS_LIST	1
-
-/*
- * Define the file type flags for base and info directories.
- */
-#define RFTYPE_INFO			BIT(0)
-#define RFTYPE_BASE			BIT(1)
-#define RF_CTRLSHIFT			4
-#define RF_MONSHIFT			5
-#define RF_TOPSHIFT			6
-#define RFTYPE_CTRL			BIT(RF_CTRLSHIFT)
-#define RFTYPE_MON			BIT(RF_MONSHIFT)
-#define RFTYPE_TOP			BIT(RF_TOPSHIFT)
-#define RFTYPE_RES_CACHE		BIT(8)
-#define RFTYPE_RES_MB			BIT(9)
-#define RF_CTRL_INFO			(RFTYPE_INFO | RFTYPE_CTRL)
-#define RF_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
-#define RF_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
-#define RF_CTRL_BASE			(RFTYPE_BASE | RFTYPE_CTRL)
-
-/* List of all resource groups */
-extern struct list_head rdt_all_groups;
-
-extern int max_name_width, max_data_width;
-
-int __init rdtgroup_init(void);
-void __exit rdtgroup_exit(void);
-
 /**
- * struct rftype - describe each file in the resctrl file system
- * @name:	File name
- * @mode:	Access mode
- * @kf_ops:	File operations
- * @flags:	File specific RFTYPE_FLAGS_* flags
- * @fflags:	File specific RF_* or RFTYPE_* flags
- * @seq_show:	Show content of the file
- * @write:	Write to the file
+ * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
+ *			      a resource for a monitor function
+ * @d_resctrl:	Properties exposed to the resctrl file system
+ * @arch_mbm_states:	Per-event pointer to the MBM event's saved state.
+ *			An MBM event's state is an array of struct arch_mbm_state
+ *			indexed by RMID on x86.
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
  */
-struct rftype {
-	char			*name;
-	umode_t			mode;
-	const struct kernfs_ops	*kf_ops;
-	unsigned long		flags;
-	unsigned long		fflags;
-
-	int (*seq_show)(struct kernfs_open_file *of,
-			struct seq_file *sf, void *v);
-	/*
-	 * write() is the generic write callback which maps directly to
-	 * kernfs write operation and overrides all other operations.
-	 * Maximum write size is determined by ->max_write_len.
-	 */
-	ssize_t (*write)(struct kernfs_open_file *of,
-			 char *buf, size_t nbytes, loff_t off);
+struct rdt_hw_mon_domain {
+	struct rdt_mon_domain		d_resctrl;
+	struct arch_mbm_state		*arch_mbm_states[QOS_NUM_L3_MBM_EVENTS];
 };
 
-/**
- * struct mbm_state - status for each MBM counter in each domain
- * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
- * @prev_msr:	Value of IA32_QM_CTR for this RMID last time we read it
- * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
- * @prev_bw:	The most recent bandwidth in MBps
- * @delta_bw:	Difference between the current and previous bandwidth
- * @delta_comp:	Indicates whether to compute the delta_bw
- */
-struct mbm_state {
-	u64	chunks;
-	u64	prev_msr;
-	u64	prev_bw_msr;
-	u32	prev_bw;
-	u32	delta_bw;
-	bool	delta_comp;
-};
+static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
+{
+	return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl);
+}
 
-/**
- * struct rdt_domain - group of cpus sharing an RDT resource
- * @list:	all instances of this resource
- * @id:		unique id for this instance
- * @cpu_mask:	which cpus share this resource
- * @rmid_busy_llc:
- *		bitmap of which limbo RMIDs are above threshold
- * @mbm_total:	saved state for MBM total bandwidth
- * @mbm_local:	saved state for MBM local bandwidth
- * @mbm_over:	worker to periodically read MBM h/w counters
- * @cqm_limbo:	worker to periodically read CQM h/w counters
- * @mbm_work_cpu:
- *		worker cpu for MBM h/w counters
- * @cqm_work_cpu:
- *		worker cpu for CQM h/w counters
- * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
- * @mbps_val:	When mba_sc is enabled, this holds the bandwidth in MBps
- * @new_ctrl:	new ctrl value to be loaded
- * @have_new_ctrl: did user provide new_ctrl for this domain
- * @plr:	pseudo-locked region (if any) associated with domain
- */
-struct rdt_domain {
-	struct list_head		list;
-	int				id;
-	struct cpumask			cpu_mask;
-	unsigned long			*rmid_busy_llc;
-	struct mbm_state		*mbm_total;
-	struct mbm_state		*mbm_local;
-	struct delayed_work		mbm_over;
-	struct delayed_work		cqm_limbo;
-	int				mbm_work_cpu;
-	int				cqm_work_cpu;
-	u32				*ctrl_val;
-	u32				*mbps_val;
-	u32				new_ctrl;
-	bool				have_new_ctrl;
-	struct pseudo_lock_region	*plr;
-};
+static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r)
+{
+	return container_of(r, struct rdt_hw_mon_domain, d_resctrl);
+}
 
 /**
  * struct msr_param - set a range of MSRs from a domain
  * @res:       The resource to use
+ * @dom:       The domain to update
  * @low:       Beginning index from base MSR
  * @high:      End index
  */
 struct msr_param {
 	struct rdt_resource	*res;
-	int			low;
-	int			high;
-};
-
-/**
- * struct rdt_cache - Cache allocation related data
- * @cbm_len:		Length of the cache bit mask
- * @min_cbm_bits:	Minimum number of consecutive bits to be set
- * @cbm_idx_mult:	Multiplier of CBM index
- * @cbm_idx_offset:	Offset of CBM index. CBM index is computed by:
- *			closid * cbm_idx_multi + cbm_idx_offset
- *			in a cache bit mask
- * @shareable_bits:	Bitmask of shareable resource with other
- *			executing entities
- * @arch_has_sparse_bitmaps:	True if a bitmap like f00f is valid.
- * @arch_has_empty_bitmaps:	True if the '0' bitmap is valid.
- * @arch_has_per_cpu_cfg:	True if QOS_CFG register for this cache
- *				level has CPU scope.
- */
-struct rdt_cache {
-	unsigned int	cbm_len;
-	unsigned int	min_cbm_bits;
-	unsigned int	cbm_idx_mult;
-	unsigned int	cbm_idx_offset;
-	unsigned int	shareable_bits;
-	bool		arch_has_sparse_bitmaps;
-	bool		arch_has_empty_bitmaps;
-	bool		arch_has_per_cpu_cfg;
+	struct rdt_ctrl_domain	*dom;
+	u32			low;
+	u32			high;
 };
 
 /**
- * enum membw_throttle_mode - System's memory bandwidth throttling mode
- * @THREAD_THROTTLE_UNDEFINED:	Not relevant to the system
- * @THREAD_THROTTLE_MAX:	Memory bandwidth is throttled at the core
- *				always using smallest bandwidth percentage
- *				assigned to threads, aka "max throttling"
- * @THREAD_THROTTLE_PER_THREAD:	Memory bandwidth is throttled at the thread
- */
-enum membw_throttle_mode {
-	THREAD_THROTTLE_UNDEFINED = 0,
-	THREAD_THROTTLE_MAX,
-	THREAD_THROTTLE_PER_THREAD,
-};
-
-/**
- * struct rdt_membw - Memory bandwidth allocation related data
- * @min_bw:		Minimum memory bandwidth percentage user can request
- * @bw_gran:		Granularity at which the memory bandwidth is allocated
- * @delay_linear:	True if memory B/W delay is in linear scale
- * @arch_needs_linear:	True if we can't configure non-linear resources
- * @throttle_mode:	Bandwidth throttling mode when threads request
- *			different memory bandwidths
- * @mba_sc:		True if MBA software controller(mba_sc) is enabled
- * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
- */
-struct rdt_membw {
-	u32				min_bw;
-	u32				bw_gran;
-	u32				delay_linear;
-	bool				arch_needs_linear;
-	enum membw_throttle_mode	throttle_mode;
-	bool				mba_sc;
-	u32				*mb_map;
-};
-
-static inline bool is_llc_occupancy_enabled(void)
-{
-	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
-}
-
-static inline bool is_mbm_total_enabled(void)
-{
-	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
-}
-
-static inline bool is_mbm_local_enabled(void)
-{
-	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
-}
-
-static inline bool is_mbm_enabled(void)
-{
-	return (is_mbm_total_enabled() || is_mbm_local_enabled());
-}
-
-static inline bool is_mbm_event(int e)
-{
-	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
-		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
-}
-
-struct rdt_parse_data {
-	struct rdtgroup		*rdtgrp;
-	char			*buf;
-};
-
-/**
- * struct rdt_resource - attributes of an RDT resource
- * @rid:		The index of the resource
- * @alloc_enabled:	Is allocation enabled on this machine
- * @mon_enabled:	Is monitoring enabled for this feature
- * @alloc_capable:	Is allocation available on this machine
- * @mon_capable:	Is monitor feature available on this machine
- * @name:		Name to use in "schemata" file
- * @num_closid:		Number of CLOSIDs available
- * @cache_level:	Which cache level defines scope of this resource
- * @default_ctrl:	Specifies default cache cbm or memory B/W percent.
+ * struct rdt_hw_resource - arch private attributes of a resctrl resource
+ * @r_resctrl:		Attributes of the resource used directly by resctrl.
+ * @num_closid:		Maximum number of closid this hardware can support,
+ *			regardless of CDP. This is exposed via
+ *			resctrl_arch_get_num_closid() to avoid confusion
+ *			with struct resctrl_schema's property of the same name,
+ *			which has been corrected for features like CDP.
  * @msr_base:		Base MSR address for CBMs
  * @msr_update:		Function pointer to update QOS MSRs
- * @data_width:		Character width of data when displaying
- * @domains:		All domains for this resource
- * @cache:		Cache allocation related data
- * @membw:		If the component has bandwidth controls, their properties.
- * @format_str:		Per resource format string to show domain value
- * @parse_ctrlval:	Per resource function pointer to parse control values
- * @evt_list:		List of monitoring events
- * @num_rmid:		Number of RMIDs available
  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
  * @mbm_width:		Monitor width, to detect and correct for overflow.
- * @fflags:		flags to choose base and info files
+ * @cdp_enabled:	CDP state of this resource
+ * @mbm_cntr_assign_enabled:	ABMC feature is enabled
+ * @sdciae_enabled:	SDCIAE feature (backing "io_alloc") is enabled.
+ *
+ * Members of this structure are either private to the architecture
+ * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
+ * msr_update and msr_base.
  */
-struct rdt_resource {
-	int			rid;
-	bool			alloc_enabled;
-	bool			mon_enabled;
-	bool			alloc_capable;
-	bool			mon_capable;
-	char			*name;
-	int			num_closid;
-	int			cache_level;
-	u32			default_ctrl;
+struct rdt_hw_resource {
+	struct rdt_resource	r_resctrl;
+	u32			num_closid;
 	unsigned int		msr_base;
-	void (*msr_update)	(struct rdt_domain *d, struct msr_param *m,
-				 struct rdt_resource *r);
-	int			data_width;
-	struct list_head	domains;
-	struct rdt_cache	cache;
-	struct rdt_membw	membw;
-	const char		*format_str;
-	int (*parse_ctrlval)(struct rdt_parse_data *data,
-			     struct rdt_resource *r,
-			     struct rdt_domain *d);
-	struct list_head	evt_list;
-	int			num_rmid;
+	void			(*msr_update)(struct msr_param *m);
 	unsigned int		mon_scale;
 	unsigned int		mbm_width;
-	unsigned long		fflags;
+	bool			cdp_enabled;
+	bool			mbm_cntr_assign_enabled;
+	bool			sdciae_enabled;
 };
 
-int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
-	      struct rdt_domain *d);
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
-	     struct rdt_domain *d);
-
-extern struct mutex rdtgroup_mutex;
-
-extern struct rdt_resource rdt_resources_all[];
-extern struct rdtgroup rdtgroup_default;
-DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
-
-extern struct dentry *debugfs_resctrl;
-
-enum {
-	RDT_RESOURCE_L3,
-	RDT_RESOURCE_L3DATA,
-	RDT_RESOURCE_L3CODE,
-	RDT_RESOURCE_L2,
-	RDT_RESOURCE_L2DATA,
-	RDT_RESOURCE_L2CODE,
-	RDT_RESOURCE_MBA,
-
-	/* Must be the last */
-	RDT_NUM_RESOURCES,
-};
-
-#define for_each_rdt_resource(r)					      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)
-
-#define for_each_capable_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->alloc_capable || r->mon_capable)
-
-#define for_each_alloc_capable_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->alloc_capable)
-
-#define for_each_mon_capable_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->mon_capable)
+static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
+{
+	return container_of(r, struct rdt_hw_resource, r_resctrl);
+}
 
-#define for_each_alloc_enabled_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->alloc_enabled)
+extern struct rdt_hw_resource rdt_resources_all[];
 
-#define for_each_mon_enabled_rdt_resource(r)				      \
-	for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
-	     r++)							      \
-		if (r->mon_enabled)
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
 
 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
 union cpuid_0x10_1_eax {
@@ -569,6 +158,15 @@ union cpuid_0x10_3_eax {
 	unsigned int full;
 };
 
+/* CPUID.(EAX=10H, ECX=ResID).ECX */
+union cpuid_0x10_x_ecx {
+	struct {
+		unsigned int reserved:3;
+		unsigned int noncont:1;
+	} split;
+	unsigned int full;
+};
+
 /* CPUID.(EAX=10H, ECX=ResID).EDX */
 union cpuid_0x10_x_edx {
 	struct {
@@ -577,65 +175,51 @@ union cpuid_0x10_x_edx {
 	unsigned int full;
 };
 
-void rdt_last_cmd_clear(void);
-void rdt_last_cmd_puts(const char *s);
-__printf(1, 2)
-void rdt_last_cmd_printf(const char *fmt, ...);
+/*
+ * ABMC counters are configured by writing to MSR_IA32_L3_QOS_ABMC_CFG.
+ *
+ * @bw_type		: Event configuration that represents the memory
+ *			  transactions being tracked by the @cntr_id.
+ * @bw_src		: Bandwidth source (RMID or CLOSID).
+ * @reserved1		: Reserved.
+ * @is_clos		: @bw_src field is a CLOSID (not an RMID).
+ * @cntr_id		: Counter identifier.
+ * @reserved		: Reserved.
+ * @cntr_en		: Counting enable bit.
+ * @cfg_en		: Configuration enable bit.
+ *
+ * Configuration and counting:
+ * Counter can be configured across multiple writes to MSR. Configuration
+ * is applied only when @cfg_en = 1. Counter @cntr_id is reset when the
+ * configuration is applied.
+ * @cfg_en = 1, @cntr_en = 0 : Apply @cntr_id configuration but do not
+ *                             count events.
+ * @cfg_en = 1, @cntr_en = 1 : Apply @cntr_id configuration and start
+ *                             counting events.
+ */
+union l3_qos_abmc_cfg {
+	struct {
+		unsigned long bw_type  :32,
+			      bw_src   :12,
+			      reserved1: 3,
+			      is_clos  : 1,
+			      cntr_id  : 5,
+			      reserved : 9,
+			      cntr_en  : 1,
+			      cfg_en   : 1;
+	} split;
+	unsigned long full;
+};
 
 void rdt_ctrl_update(void *arg);
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
-void rdtgroup_kn_unlock(struct kernfs_node *kn);
-int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
-int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
-			     umode_t mask);
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
-				   struct list_head **pos);
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
-				char *buf, size_t nbytes, loff_t off);
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
-			   struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-			   unsigned long cbm, int closid, bool exclusive);
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
-				  unsigned long cbm);
-enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
-int rdtgroup_tasks_assigned(struct rdtgroup *r);
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
-int rdt_pseudo_lock_init(void);
-void rdt_pseudo_lock_release(void);
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
-int update_domains(struct rdt_resource *r, int closid);
-int closids_supported(void);
-void closid_free(int closid);
-int alloc_rmid(void);
-void free_rmid(u32 rmid);
+
 int rdt_get_mon_l3_config(struct rdt_resource *r);
-void mon_event_count(void *info);
-int rdtgroup_mondata_show(struct seq_file *m, void *arg);
-void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-				    unsigned int dom_id);
-void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-				    struct rdt_domain *d);
-void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
-		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
-		    int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom,
-				unsigned long delay_ms);
-void mbm_handle_overflow(struct work_struct *work);
+
+bool rdt_cpu_has(int flag);
+
 void __init intel_rdt_mbm_apply_quirk(void);
-bool is_mba_sc(struct rdt_resource *r);
-void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
-u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
-void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
-void __check_limbo(struct rdt_domain *d, bool force_free);
+
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
-void __init thread_throttle_mode_init(void);
+void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f07c10b87a87..dffcc8307500 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,37 +15,15 @@
  * Software Developer Manual June 2016, volume 3, section 17.17.
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/cpu_device_id.h>
-#include "internal.h"
+#define pr_fmt(fmt)	"resctrl: " fmt
 
-struct rmid_entry {
-	u32				rmid;
-	int				busy;
-	struct list_head		list;
-};
+#include <linux/cpu.h>
+#include <linux/resctrl.h>
 
-/**
- * @rmid_free_lru    A least recently used list of free RMIDs
- *     These RMIDs are guaranteed to have an occupancy less than the
- *     threshold occupancy
- */
-static LIST_HEAD(rmid_free_lru);
-
-/**
- * @rmid_limbo_count     count of currently unused but (potentially)
- *     dirty RMIDs.
- *     This counts RMIDs that no one is currently using but that
- *     may have a occupancy value > intel_cqm_threshold. User can change
- *     the threshold occupancy value.
- */
-static unsigned int rmid_limbo_count;
+#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
 
-/**
- * @rmid_entry - The entry in the limbo and free lists.
- */
-static struct rmid_entry	*rmid_ptrs;
+#include "internal.h"
 
 /*
  * Global boolean for rdt_monitor which is true if any
@@ -53,21 +31,12 @@ static struct rmid_entry	*rmid_ptrs;
  */
 bool rdt_mon_capable;
 
-/*
- * Global to indicate which monitoring events are enabled.
- */
-unsigned int rdt_mon_features;
-
-/*
- * This is the threshold cache occupancy at which we will consider an
- * RMID available for re-allocation.
- */
-unsigned int resctrl_cqm_threshold;
-
 #define CF(cf)	((unsigned long)(1048576 * (cf) + 0.5))
 
+static int snc_nodes_per_l3_cache = 1;
+
 /*
- * The correction factor table is documented in Documentation/x86/resctrl.rst.
+ * The correction factor table is documented in Documentation/filesystems/resctrl.rst.
  * If rmid > rmid threshold, MBM total and local values should be multiplied
  * by the correction factor.
  *
@@ -116,6 +85,7 @@ static const struct mbm_correction_factor_table {
 };
 
 static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
+
 static u64 mbm_cf __read_mostly;
 
 static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
@@ -127,19 +97,45 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
 	return val;
 }
 
-static inline struct rmid_entry *__rmid_entry(u32 rmid)
+/*
+ * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by
+ * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is
+ * needed. The physical RMID is the same as the logical RMID.
+ *
+ * On a platform with SNC mode enabled, Linux enables RMID sharing mode
+ * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel
+ * Resource Director Technology Architecture Specification" for a full
+ * description of RMID sharing mode).
+ *
+ * In RMID sharing mode there are fewer "logical RMID" values available
+ * to accumulate data ("physical RMIDs" are divided evenly between SNC
+ * nodes that share an L3 cache). Linux creates an rdt_mon_domain for
+ * each SNC node.
+ *
+ * The value loaded into IA32_PQR_ASSOC is the "logical RMID".
+ *
+ * Data is collected independently on each SNC node and can be retrieved
+ * using the "physical RMID" value computed by this function and loaded
+ * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node.
+ *
+ * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3
+ * cache.  So a "physical RMID" may be read from any CPU that shares
+ * the L3 cache with the desired SNC node, not just from a CPU in
+ * the specific SNC node.
+ */
+static int logical_rmid_to_physical_rmid(int cpu, int lrmid)
 {
-	struct rmid_entry *entry;
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 
-	entry = &rmid_ptrs[rmid];
-	WARN_ON(entry->rmid != rmid);
+	if (snc_nodes_per_l3_cache == 1)
+		return lrmid;
 
-	return entry;
+	return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid;
 }
 
-static u64 __rmid_read(u32 rmid, u32 eventid)
+static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
 {
-	u64 val;
+	u64 msr_val;
 
 	/*
 	 * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
@@ -149,574 +145,439 @@ static u64 __rmid_read(u32 rmid, u32 eventid)
 	 * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
 	 * are error bits.
 	 */
-	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
-	rdmsrl(MSR_IA32_QM_CTR, val);
-
-	return val;
-}
+	wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid);
+	rdmsrq(MSR_IA32_QM_CTR, msr_val);
 
-static bool rmid_dirty(struct rmid_entry *entry)
-{
-	u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
+	if (msr_val & RMID_VAL_ERROR)
+		return -EIO;
+	if (msr_val & RMID_VAL_UNAVAIL)
+		return -EINVAL;
 
-	return val >= resctrl_cqm_threshold;
+	*val = msr_val;
+	return 0;
 }
 
-/*
- * Check the RMIDs that are marked as busy for this domain. If the
- * reported LLC occupancy is below the threshold clear the busy bit and
- * decrement the count. If the busy count gets to zero on an RMID, we
- * free the RMID
- */
-void __check_limbo(struct rdt_domain *d, bool force_free)
+static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
+						 u32 rmid,
+						 enum resctrl_event_id eventid)
 {
-	struct rmid_entry *entry;
-	struct rdt_resource *r;
-	u32 crmid = 1, nrmid;
+	struct arch_mbm_state *state;
 
-	r = &rdt_resources_all[RDT_RESOURCE_L3];
+	if (!resctrl_is_mbm_event(eventid))
+		return NULL;
 
-	/*
-	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
-	 * are marked as busy for occupancy < threshold. If the occupancy
-	 * is less than the threshold decrement the busy counter of the
-	 * RMID and move it to the free list when the counter reaches 0.
-	 */
-	for (;;) {
-		nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
-		if (nrmid >= r->num_rmid)
-			break;
-
-		entry = __rmid_entry(nrmid);
-		if (force_free || !rmid_dirty(entry)) {
-			clear_bit(entry->rmid, d->rmid_busy_llc);
-			if (!--entry->busy) {
-				rmid_limbo_count--;
-				list_add_tail(&entry->list, &rmid_free_lru);
-			}
-		}
-		crmid = nrmid + 1;
-	}
+	state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)];
+
+	return state ? &state[rmid] : NULL;
 }
 
-bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
+			     u32 unused, u32 rmid,
+			     enum resctrl_event_id eventid)
 {
-	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	int cpu = cpumask_any(&d->hdr.cpu_mask);
+	struct arch_mbm_state *am;
+	u32 prmid;
+
+	am = get_arch_mbm_state(hw_dom, rmid, eventid);
+	if (am) {
+		memset(am, 0, sizeof(*am));
+
+		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+		/* Record any initial, non-zero count value. */
+		__rmid_read_phys(prmid, eventid, &am->prev_msr);
+	}
 }
 
 /*
- * As of now the RMIDs allocation is global.
- * However we keep track of which packages the RMIDs
- * are used to optimize the limbo list management.
+ * Assumes that hardware counters are also reset and thus that there is
+ * no need to record initial non-zero counts.
  */
-int alloc_rmid(void)
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
 {
-	struct rmid_entry *entry;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	if (list_empty(&rmid_free_lru))
-		return rmid_limbo_count ? -EBUSY : -ENOSPC;
+	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	enum resctrl_event_id eventid;
+	int idx;
+
+	for_each_mbm_event_id(eventid) {
+		if (!resctrl_is_mon_event_enabled(eventid))
+			continue;
+		idx = MBM_STATE_IDX(eventid);
+		memset(hw_dom->arch_mbm_states[idx], 0,
+		       sizeof(*hw_dom->arch_mbm_states[0]) * r->mon.num_rmid);
+	}
+}
 
-	entry = list_first_entry(&rmid_free_lru,
-				 struct rmid_entry, list);
-	list_del(&entry->list);
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
+{
+	u64 shift = 64 - width, chunks;
 
-	return entry->rmid;
+	chunks = (cur_msr << shift) - (prev_msr << shift);
+	return chunks >> shift;
 }
 
-static void add_rmid_to_limbo(struct rmid_entry *entry)
+static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d,
+			     u32 rmid, enum resctrl_event_id eventid, u64 msr_val)
 {
-	struct rdt_resource *r;
-	struct rdt_domain *d;
-	int cpu;
-	u64 val;
-
-	r = &rdt_resources_all[RDT_RESOURCE_L3];
-
-	entry->busy = 0;
-	cpu = get_cpu();
-	list_for_each_entry(d, &r->domains, list) {
-		if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
-			val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
-			if (val <= resctrl_cqm_threshold)
-				continue;
-		}
-
-		/*
-		 * For the first limbo RMID in the domain,
-		 * setup up the limbo worker.
-		 */
-		if (!has_busy_rmid(r, d))
-			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
-		set_bit(entry->rmid, d->rmid_busy_llc);
-		entry->busy++;
+	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	struct arch_mbm_state *am;
+	u64 chunks;
+
+	am = get_arch_mbm_state(hw_dom, rmid, eventid);
+	if (am) {
+		am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
+						 hw_res->mbm_width);
+		chunks = get_corrected_mbm_count(rmid, am->chunks);
+		am->prev_msr = msr_val;
+	} else {
+		chunks = msr_val;
 	}
-	put_cpu();
 
-	if (entry->busy)
-		rmid_limbo_count++;
-	else
-		list_add_tail(&entry->list, &rmid_free_lru);
+	return chunks * hw_res->mon_scale;
 }
 
-void free_rmid(u32 rmid)
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+			   u32 unused, u32 rmid, enum resctrl_event_id eventid,
+			   u64 *val, void *ignored)
 {
-	struct rmid_entry *entry;
+	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	int cpu = cpumask_any(&d->hdr.cpu_mask);
+	struct arch_mbm_state *am;
+	u64 msr_val;
+	u32 prmid;
+	int ret;
 
-	if (!rmid)
-		return;
+	resctrl_arch_rmid_read_context_check();
 
-	lockdep_assert_held(&rdtgroup_mutex);
+	prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+	ret = __rmid_read_phys(prmid, eventid, &msr_val);
 
-	entry = __rmid_entry(rmid);
+	if (!ret) {
+		*val = get_corrected_val(r, d, rmid, eventid, msr_val);
+	} else if (ret == -EINVAL) {
+		am = get_arch_mbm_state(hw_dom, rmid, eventid);
+		if (am)
+			am->prev_msr = 0;
+	}
 
-	if (is_llc_occupancy_enabled())
-		add_rmid_to_limbo(entry);
-	else
-		list_add_tail(&entry->list, &rmid_free_lru);
+	return ret;
 }
 
-static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
+static int __cntr_id_read(u32 cntr_id, u64 *val)
 {
-	u64 shift = 64 - width, chunks;
+	u64 msr_val;
 
-	chunks = (cur_msr << shift) - (prev_msr << shift);
-	return chunks >>= shift;
+	/*
+	 * QM_EVTSEL Register definition:
+	 * =======================================================
+	 * Bits    Mnemonic        Description
+	 * =======================================================
+	 * 63:44   --              Reserved
+	 * 43:32   RMID            RMID or counter ID in ABMC mode
+	 *                         when reading an MBM event
+	 * 31      ExtendedEvtID   Extended Event Identifier
+	 * 30:8    --              Reserved
+	 * 7:0     EvtID           Event Identifier
+	 * =======================================================
+	 * The contents of a specific counter can be read by setting the
+	 * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
+	 * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting QM_EVTSEL.RMID
+	 * to the desired counter ID. Reading the QM_CTR then returns the
+	 * contents of the specified counter. The RMID_VAL_ERROR bit is set
+	 * if the counter configuration is invalid, or if an invalid counter
+	 * ID is set in the QM_EVTSEL.RMID field.  The RMID_VAL_UNAVAIL bit
+	 * is set if the counter data is unavailable.
+	 */
+	wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
+	rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+	if (msr_val & RMID_VAL_ERROR)
+		return -EIO;
+	if (msr_val & RMID_VAL_UNAVAIL)
+		return -EINVAL;
+
+	*val = msr_val;
+	return 0;
 }
 
-static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+			     u32 unused, u32 rmid, int cntr_id,
+			     enum resctrl_event_id eventid)
 {
-	struct mbm_state *m;
-	u64 chunks, tval;
+	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct arch_mbm_state *am;
 
-	tval = __rmid_read(rmid, rr->evtid);
-	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
-		rr->val = tval;
-		return -EINVAL;
-	}
-	switch (rr->evtid) {
-	case QOS_L3_OCCUP_EVENT_ID:
-		rr->val += tval;
-		return 0;
-	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		m = &rr->d->mbm_total[rmid];
-		break;
-	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		m = &rr->d->mbm_local[rmid];
-		break;
-	default:
-		/*
-		 * Code would never reach here because
-		 * an invalid event id would fail the __rmid_read.
-		 */
-		return -EINVAL;
-	}
+	am = get_arch_mbm_state(hw_dom, rmid, eventid);
+	if (am) {
+		memset(am, 0, sizeof(*am));
 
-	if (rr->first) {
-		memset(m, 0, sizeof(struct mbm_state));
-		m->prev_bw_msr = m->prev_msr = tval;
-		return 0;
+		/* Record any initial, non-zero count value. */
+		__cntr_id_read(cntr_id, &am->prev_msr);
 	}
+}
 
-	chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
-	m->chunks += chunks;
-	m->prev_msr = tval;
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+			   u32 unused, u32 rmid, int cntr_id,
+			   enum resctrl_event_id eventid, u64 *val)
+{
+	u64 msr_val;
+	int ret;
 
-	rr->val += get_corrected_mbm_count(rmid, m->chunks);
+	ret = __cntr_id_read(cntr_id, &msr_val);
+	if (ret)
+		return ret;
+
+	*val = get_corrected_val(r, d, rmid, eventid, msr_val);
 
 	return 0;
 }
 
 /*
- * Supporting function to calculate the memory bandwidth
- * and delta bandwidth in MBps.
+ * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
+ * which indicates that RMIDs are configured in legacy mode.
+ * This mode is incompatible with Linux resctrl semantics
+ * as RMIDs are partitioned between SNC nodes, which requires
+ * a user to know which RMID is allocated to a task.
+ * Clearing bit 0 reconfigures the RMID counters for use
+ * in RMID sharing mode. This mode is better for Linux.
+ * The RMID space is divided between all SNC nodes with the
+ * RMIDs renumbered to start from zero in each node when
+ * counting operations from tasks. Code to read the counters
+ * must adjust RMID counter numbers based on SNC node. See
+ * logical_rmid_to_physical_rmid() for code that does this.
  */
-static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
 {
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
-	struct mbm_state *m = &rr->d->mbm_local[rmid];
-	u64 tval, cur_bw, chunks;
-
-	tval = __rmid_read(rmid, rr->evtid);
-	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-		return;
-
-	chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
-	cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
-
-	if (m->delta_comp)
-		m->delta_bw = abs(cur_bw - m->prev_bw);
-	m->delta_comp = false;
-	m->prev_bw = cur_bw;
-	m->prev_bw_msr = tval;
+	if (snc_nodes_per_l3_cache > 1)
+		msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
 }
 
+/* CPU models that support MSR_RMID_SNC_CONFIG */
+static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
+	X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
+	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
+	X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
+	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
+	X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0),
+	{}
+};
+
 /*
- * This is called via IPI to read the CQM/MBM counters
- * on a domain.
+ * There isn't a simple hardware bit that indicates whether a CPU is running
+ * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
+ * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
+ * the same NUMA node as CPU0.
+ * It is not possible to accurately determine SNC state if the system is
+ * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
+ * to L3 caches. It will be OK if system is booted with hyperthreading
+ * disabled (since this doesn't affect the ratio).
  */
-void mon_event_count(void *info)
+static __init int snc_get_config(void)
 {
-	struct rdtgroup *rdtgrp, *entry;
-	struct rmid_read *rr = info;
-	struct list_head *head;
+	struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
+	const cpumask_t *node0_cpumask;
+	int cpus_per_node, cpus_per_l3;
+	int ret;
 
-	rdtgrp = rr->rgrp;
+	if (!x86_match_cpu(snc_cpu_ids) || !ci)
+		return 1;
 
-	if (__mon_event_count(rdtgrp->mon.rmid, rr))
-		return;
+	cpus_read_lock();
+	if (num_online_cpus() != num_present_cpus())
+		pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
+	cpus_read_unlock();
 
-	/*
-	 * For Ctrl groups read data from child monitor groups.
-	 */
-	head = &rdtgrp->mon.crdtgrp_list;
+	node0_cpumask = cpumask_of_node(cpu_to_node(0));
+
+	cpus_per_node = cpumask_weight(node0_cpumask);
+	cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
+
+	if (!cpus_per_node || !cpus_per_l3)
+		return 1;
 
-	if (rdtgrp->type == RDTCTRL_GROUP) {
-		list_for_each_entry(entry, head, mon.crdtgrp_list) {
-			if (__mon_event_count(entry->mon.rmid, rr))
-				return;
-		}
+	ret = cpus_per_l3 / cpus_per_node;
+
+	/* sanity check: Only valid results are 1, 2, 3, 4, 6 */
+	switch (ret) {
+	case 1:
+		break;
+	case 2 ... 4:
+	case 6:
+		pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
+		rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
+		break;
+	default:
+		pr_warn("Ignore improbable SNC node count %d\n", ret);
+		ret = 1;
+		break;
 	}
+
+	return ret;
 }
 
-/*
- * Feedback loop for MBA software controller (mba_sc)
- *
- * mba_sc is a feedback loop where we periodically read MBM counters and
- * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
- * that:
- *
- *   current bandwidth(cur_bw) < user specified bandwidth(user_bw)
- *
- * This uses the MBM counters to measure the bandwidth and MBA throttle
- * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
- * fact that resctrl rdtgroups have both monitoring and control.
- *
- * The frequency of the checks is 1s and we just tag along the MBM overflow
- * timer. Having 1s interval makes the calculation of bandwidth simpler.
- *
- * Although MBA's goal is to restrict the bandwidth to a maximum, there may
- * be a need to increase the bandwidth to avoid unnecessarily restricting
- * the L2 <-> L3 traffic.
- *
- * Since MBA controls the L2 external bandwidth where as MBM measures the
- * L3 external bandwidth the following sequence could lead to such a
- * situation.
- *
- * Consider an rdtgroup which had high L3 <-> memory traffic in initial
- * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
- * after some time rdtgroup has mostly L2 <-> L3 traffic.
- *
- * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
- * throttle MSRs already have low percentage values.  To avoid
- * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
- */
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 {
-	u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
-	struct mbm_state *pmbm_data, *cmbm_data;
-	u32 cur_bw, delta_bw, user_bw;
-	struct rdt_resource *r_mba;
-	struct rdt_domain *dom_mba;
-	struct list_head *head;
-	struct rdtgroup *entry;
-
-	if (!is_mbm_local_enabled())
-		return;
+	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	unsigned int threshold;
+	u32 eax, ebx, ecx, edx;
 
-	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
-	closid = rgrp->closid;
-	rmid = rgrp->mon.rmid;
-	pmbm_data = &dom_mbm->mbm_local[rmid];
+	snc_nodes_per_l3_cache = snc_get_config();
 
-	dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
-	if (!dom_mba) {
-		pr_warn_once("Failure to get domain for MBA update\n");
-		return;
-	}
+	resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
+	hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
+	r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
+	hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
 
-	cur_bw = pmbm_data->prev_bw;
-	user_bw = dom_mba->mbps_val[closid];
-	delta_bw = pmbm_data->delta_bw;
-	cur_msr_val = dom_mba->ctrl_val[closid];
+	if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
+		hw_res->mbm_width += mbm_offset;
+	else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
+		pr_warn("Ignoring impossible MBM counter offset\n");
 
 	/*
-	 * For Ctrl groups read data from child monitor groups.
+	 * A reasonable upper limit on the max threshold is the number
+	 * of lines tagged per RMID if all RMIDs have the same number of
+	 * lines tagged in the LLC.
+	 *
+	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
 	 */
-	head = &rgrp->mon.crdtgrp_list;
-	list_for_each_entry(entry, head, mon.crdtgrp_list) {
-		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
-		cur_bw += cmbm_data->prev_bw;
-		delta_bw += cmbm_data->delta_bw;
-	}
+	threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid;
 
 	/*
-	 * Scale up/down the bandwidth linearly for the ctrl group.  The
-	 * bandwidth step is the bandwidth granularity specified by the
-	 * hardware.
-	 *
-	 * The delta_bw is used when increasing the bandwidth so that we
-	 * dont alternately increase and decrease the control values
-	 * continuously.
-	 *
-	 * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
-	 * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
-	 * switching between 90 and 110 continuously if we only check
-	 * cur_bw < user_bw.
+	 * Because num_rmid may not be a power of two, round the value
+	 * to the nearest multiple of hw_res->mon_scale so it matches a
+	 * value the hardware will measure. mon_scale may not be a power of 2.
 	 */
-	if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
-		new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
-	} else if (cur_msr_val < MAX_MBA_BW &&
-		   (user_bw > (cur_bw + delta_bw))) {
-		new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
-	} else {
-		return;
-	}
+	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
 
-	cur_msr = r_mba->msr_base + closid;
-	wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
-	dom_mba->ctrl_val[closid] = new_msr_val;
+	if (rdt_cpu_has(X86_FEATURE_BMEC) || rdt_cpu_has(X86_FEATURE_ABMC)) {
+		/* Detect list of bandwidth sources that can be tracked */
+		cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
+		r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+	}
 
 	/*
-	 * Delta values are updated dynamically package wise for each
-	 * rdtgrp every time the throttle MSR changes value.
-	 *
-	 * This is because (1)the increase in bandwidth is not perfectly
-	 * linear and only "approximately" linear even when the hardware
-	 * says it is linear.(2)Also since MBA is a core specific
-	 * mechanism, the delta values vary based on number of cores used
-	 * by the rdtgrp.
+	 * resctrl assumes a system that supports assignable counters can
+	 * switch to "default" mode. Ensure that there is a "default" mode
+	 * to switch to. This enforces a dependency between the independent
+	 * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL
+	 * hardware features.
 	 */
-	pmbm_data->delta_comp = true;
-	list_for_each_entry(entry, head, mon.crdtgrp_list) {
-		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
-		cmbm_data->delta_comp = true;
+	if (rdt_cpu_has(X86_FEATURE_ABMC) &&
+	    (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) ||
+	     rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) {
+		r->mon.mbm_cntr_assignable = true;
+		cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx);
+		r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1;
+		hw_res->mbm_cntr_assign_enabled = true;
 	}
-}
 
-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
-{
-	struct rmid_read rr;
-
-	rr.first = false;
-	rr.r = r;
-	rr.d = d;
+	r->mon_capable = true;
 
-	/*
-	 * This is protected from concurrent reads from user
-	 * as both the user and we hold the global mutex.
-	 */
-	if (is_mbm_total_enabled()) {
-		rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
-		__mon_event_count(rmid, &rr);
-	}
-	if (is_mbm_local_enabled()) {
-		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
-		__mon_event_count(rmid, &rr);
-
-		/*
-		 * Call the MBA software controller only for the
-		 * control groups and when user has enabled
-		 * the software controller explicitly.
-		 */
-		if (is_mba_sc(NULL))
-			mbm_bw_count(rmid, &rr);
-	}
+	return 0;
 }
 
-/*
- * Handler to scan the limbo list and move the RMIDs
- * to free list whose occupancy < threshold_occupancy.
- */
-void cqm_handle_limbo(struct work_struct *work)
+void __init intel_rdt_mbm_apply_quirk(void)
 {
-	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
-	int cpu = smp_processor_id();
-	struct rdt_resource *r;
-	struct rdt_domain *d;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	r = &rdt_resources_all[RDT_RESOURCE_L3];
-	d = container_of(work, struct rdt_domain, cqm_limbo.work);
-
-	__check_limbo(d, false);
+	int cf_index;
 
-	if (has_busy_rmid(r, d))
-		schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+	cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
+	if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
+		pr_info("No MBM correction factor available\n");
+		return;
+	}
 
-	mutex_unlock(&rdtgroup_mutex);
+	mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
+	mbm_cf = mbm_cf_table[cf_index].cf;
 }
 
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+static void resctrl_abmc_set_one_amd(void *arg)
 {
-	unsigned long delay = msecs_to_jiffies(delay_ms);
-	int cpu;
-
-	cpu = cpumask_any(&dom->cpu_mask);
-	dom->cqm_work_cpu = cpu;
+	bool *enable = arg;
 
-	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+	if (*enable)
+		msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
+	else
+		msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
 }
 
-void mbm_handle_overflow(struct work_struct *work)
+/*
+ * ABMC enable/disable requires update of L3_QOS_EXT_CFG MSR on all the CPUs
+ * associated with all monitor domains.
+ */
+static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
 {
-	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
-	struct rdtgroup *prgrp, *crgrp;
-	int cpu = smp_processor_id();
-	struct list_head *head;
-	struct rdt_resource *r;
-	struct rdt_domain *d;
+	struct rdt_mon_domain *d;
 
-	mutex_lock(&rdtgroup_mutex);
+	lockdep_assert_cpus_held();
 
-	if (!static_branch_likely(&rdt_mon_enable_key))
-		goto out_unlock;
-
-	r = &rdt_resources_all[RDT_RESOURCE_L3];
-	d = container_of(work, struct rdt_domain, mbm_over.work);
-
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		mbm_update(r, d, prgrp->mon.rmid);
-
-		head = &prgrp->mon.crdtgrp_list;
-		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
-			mbm_update(r, d, crgrp->mon.rmid);
-
-		if (is_mba_sc(NULL))
-			update_mba_bw(prgrp, d);
+	list_for_each_entry(d, &r->mon_domains, hdr.list) {
+		on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd,
+				 &enable, 1);
+		resctrl_arch_reset_rmid_all(r, d);
 	}
-
-	schedule_delayed_work_on(cpu, &d->mbm_over, delay);
-
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
 }
 
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
 {
-	unsigned long delay = msecs_to_jiffies(delay_ms);
-	int cpu;
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 
-	if (!static_branch_likely(&rdt_mon_enable_key))
-		return;
-	cpu = cpumask_any(&dom->cpu_mask);
-	dom->mbm_work_cpu = cpu;
-	schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
-}
-
-static int dom_data_init(struct rdt_resource *r)
-{
-	struct rmid_entry *entry = NULL;
-	int i, nr_rmids;
-
-	nr_rmids = r->num_rmid;
-	rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
-	if (!rmid_ptrs)
-		return -ENOMEM;
-
-	for (i = 0; i < nr_rmids; i++) {
-		entry = &rmid_ptrs[i];
-		INIT_LIST_HEAD(&entry->list);
-
-		entry->rmid = i;
-		list_add_tail(&entry->list, &rmid_free_lru);
+	if (r->mon.mbm_cntr_assignable &&
+	    hw_res->mbm_cntr_assign_enabled != enable) {
+		_resctrl_abmc_enable(r, enable);
+		hw_res->mbm_cntr_assign_enabled = enable;
 	}
 
-	/*
-	 * RMID 0 is special and is always allocated. It's used for all
-	 * tasks that are not monitored.
-	 */
-	entry = __rmid_entry(0);
-	list_del(&entry->list);
-
 	return 0;
 }
 
-static struct mon_evt llc_occupancy_event = {
-	.name		= "llc_occupancy",
-	.evtid		= QOS_L3_OCCUP_EVENT_ID,
-};
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
+{
+	return resctrl_to_arch_res(r)->mbm_cntr_assign_enabled;
+}
 
-static struct mon_evt mbm_total_event = {
-	.name		= "mbm_total_bytes",
-	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
-};
+static void resctrl_abmc_config_one_amd(void *info)
+{
+	union l3_qos_abmc_cfg *abmc_cfg = info;
 
-static struct mon_evt mbm_local_event = {
-	.name		= "mbm_local_bytes",
-	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
-};
+	wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, abmc_cfg->full);
+}
 
 /*
- * Initialize the event list for the resource.
- *
- * Note that MBM events are also part of RDT_RESOURCE_L3 resource
- * because as per the SDM the total and local memory bandwidth
- * are enumerated as part of L3 monitoring.
+ * Send an IPI to the domain to assign the counter to RMID, event pair.
  */
-static void l3_mon_evt_init(struct rdt_resource *r)
-{
-	INIT_LIST_HEAD(&r->evt_list);
-
-	if (is_llc_occupancy_enabled())
-		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
-	if (is_mbm_total_enabled())
-		list_add_tail(&mbm_total_event.list, &r->evt_list);
-	if (is_mbm_local_enabled())
-		list_add_tail(&mbm_local_event.list, &r->evt_list);
-}
-
-int rdt_get_mon_l3_config(struct rdt_resource *r)
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
+			      u32 cntr_id, bool assign)
 {
-	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
-	unsigned int cl_size = boot_cpu_data.x86_cache_size;
-	int ret;
+	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	union l3_qos_abmc_cfg abmc_cfg = { 0 };
+	struct arch_mbm_state *am;
 
-	r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
-	r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
-	r->mbm_width = MBM_CNTR_WIDTH_BASE;
+	abmc_cfg.split.cfg_en = 1;
+	abmc_cfg.split.cntr_en = assign ? 1 : 0;
+	abmc_cfg.split.cntr_id = cntr_id;
+	abmc_cfg.split.bw_src = rmid;
+	if (assign)
+		abmc_cfg.split.bw_type = resctrl_get_mon_evt_cfg(evtid);
 
-	if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
-		r->mbm_width += mbm_offset;
-	else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
-		pr_warn("Ignoring impossible MBM counter offset\n");
+	smp_call_function_any(&d->hdr.cpu_mask, resctrl_abmc_config_one_amd, &abmc_cfg, 1);
 
 	/*
-	 * A reasonable upper limit on the max threshold is the number
-	 * of lines tagged per RMID if all RMIDs have the same number of
-	 * lines tagged in the LLC.
-	 *
-	 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+	 * The hardware counter is reset (because cfg_en == 1) so there is no
+	 * need to record initial non-zero counts.
 	 */
-	resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
-
-	/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
-	resctrl_cqm_threshold /= r->mon_scale;
-
-	ret = dom_data_init(r);
-	if (ret)
-		return ret;
-
-	l3_mon_evt_init(r);
-
-	r->mon_capable = true;
-	r->mon_enabled = true;
-
-	return 0;
+	am = get_arch_mbm_state(hw_dom, rmid, evtid);
+	if (am)
+		memset(am, 0, sizeof(*am));
 }
 
-void __init intel_rdt_mbm_apply_quirk(void)
+void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r)
 {
-	int cf_index;
-
-	cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
-	if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
-		pr_info("No MBM correction factor available\n");
-		return;
-	}
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 
-	mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
-	mbm_cf = mbm_cf_table[cf_index].cf;
+	resctrl_abmc_set_one_amd(&hw_res->mbm_cntr_assign_enabled);
 }
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 2207916cae65..de580eca3363 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -11,27 +11,22 @@
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
-#include <linux/cacheinfo.h>
+#include <linux/cacheflush.h>
 #include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/debugfs.h>
-#include <linux/kthread.h>
-#include <linux/mman.h>
 #include <linux/perf_event.h>
 #include <linux/pm_qos.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
+#include <linux/resctrl.h>
 
-#include <asm/cacheflush.h>
-#include <asm/intel-family.h>
-#include <asm/resctrl.h>
+#include <asm/cpu_device_id.h>
 #include <asm/perf_event.h>
+#include <asm/msr.h>
 
 #include "../../events/perf_event.h" /* For X86_CONFIG() */
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
-#include "pseudo_lock_event.h"
+
+#include "pseudo_lock_trace.h"
 
 /*
  * The bits needed to disable hardware prefetching varies based on the
@@ -39,16 +34,9 @@
  */
 static u64 prefetch_disable_bits;
 
-/*
- * Major number assigned to and shared by all devices exposing
- * pseudo-locked regions.
- */
-static unsigned int pseudo_lock_major;
-static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
-static struct class *pseudo_lock_class;
-
 /**
- * get_prefetch_disable_bits - prefetch disable bits of supported platforms
+ * resctrl_arch_get_prefetch_disable_bits - prefetch disable bits of supported
+ *                                          platforms
  * @void: It takes no parameters.
  *
  * Capture the list of platforms that have been validated to support
@@ -62,20 +50,22 @@ static struct class *pseudo_lock_class;
  * in the SDM.
  *
  * When adding a platform here also add support for its cache events to
- * measure_cycles_perf_fn()
+ * resctrl_arch_measure_l*_residency()
  *
  * Return:
  * If platform is supported, the bits to disable hardware prefetchers, 0
  * if platform is not supported.
  */
-static u64 get_prefetch_disable_bits(void)
+u64 resctrl_arch_get_prefetch_disable_bits(void)
 {
+	prefetch_disable_bits = 0;
+
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
 	    boot_cpu_data.x86 != 6)
 		return 0;
 
-	switch (boot_cpu_data.x86_model) {
-	case INTEL_FAM6_BROADWELL_X:
+	switch (boot_cpu_data.x86_vfm) {
+	case INTEL_BROADWELL_X:
 		/*
 		 * SDM defines bits of MSR_MISC_FEATURE_CONTROL register
 		 * as:
@@ -85,9 +75,10 @@ static u64 get_prefetch_disable_bits(void)
 		 * 3    DCU IP Prefetcher Disable (R/W)
 		 * 63:4 Reserved
 		 */
-		return 0xF;
-	case INTEL_FAM6_ATOM_GOLDMONT:
-	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+		prefetch_disable_bits = 0xF;
+		break;
+	case INTEL_ATOM_GOLDMONT:
+	case INTEL_ATOM_GOLDMONT_PLUS:
 		/*
 		 * SDM defines bits of MSR_MISC_FEATURE_CONTROL register
 		 * as:
@@ -96,308 +87,16 @@ static u64 get_prefetch_disable_bits(void)
 		 * 2     DCU Hardware Prefetcher Disable (R/W)
 		 * 63:3  Reserved
 		 */
-		return 0x5;
-	}
-
-	return 0;
-}
-
-/**
- * pseudo_lock_minor_get - Obtain available minor number
- * @minor: Pointer to where new minor number will be stored
- *
- * A bitmask is used to track available minor numbers. Here the next free
- * minor number is marked as unavailable and returned.
- *
- * Return: 0 on success, <0 on failure.
- */
-static int pseudo_lock_minor_get(unsigned int *minor)
-{
-	unsigned long first_bit;
-
-	first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
-
-	if (first_bit == MINORBITS)
-		return -ENOSPC;
-
-	__clear_bit(first_bit, &pseudo_lock_minor_avail);
-	*minor = first_bit;
-
-	return 0;
-}
-
-/**
- * pseudo_lock_minor_release - Return minor number to available
- * @minor: The minor number made available
- */
-static void pseudo_lock_minor_release(unsigned int minor)
-{
-	__set_bit(minor, &pseudo_lock_minor_avail);
-}
-
-/**
- * region_find_by_minor - Locate a pseudo-lock region by inode minor number
- * @minor: The minor number of the device representing pseudo-locked region
- *
- * When the character device is accessed we need to determine which
- * pseudo-locked region it belongs to. This is done by matching the minor
- * number of the device to the pseudo-locked region it belongs.
- *
- * Minor numbers are assigned at the time a pseudo-locked region is associated
- * with a cache instance.
- *
- * Return: On success return pointer to resource group owning the pseudo-locked
- *         region, NULL on failure.
- */
-static struct rdtgroup *region_find_by_minor(unsigned int minor)
-{
-	struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
-
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
-			rdtgrp_match = rdtgrp;
-			break;
-		}
-	}
-	return rdtgrp_match;
-}
-
-/**
- * struct pseudo_lock_pm_req - A power management QoS request list entry
- * @list:	Entry within the @pm_reqs list for a pseudo-locked region
- * @req:	PM QoS request
- */
-struct pseudo_lock_pm_req {
-	struct list_head list;
-	struct dev_pm_qos_request req;
-};
-
-static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
-{
-	struct pseudo_lock_pm_req *pm_req, *next;
-
-	list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
-		dev_pm_qos_remove_request(&pm_req->req);
-		list_del(&pm_req->list);
-		kfree(pm_req);
-	}
-}
-
-/**
- * pseudo_lock_cstates_constrain - Restrict cores from entering C6
- * @plr: Pseudo-locked region
- *
- * To prevent the cache from being affected by power management entering
- * C6 has to be avoided. This is accomplished by requesting a latency
- * requirement lower than lowest C6 exit latency of all supported
- * platforms as found in the cpuidle state tables in the intel_idle driver.
- * At this time it is possible to do so with a single latency requirement
- * for all supported platforms.
- *
- * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
- * the ACPI latencies need to be considered while keeping in mind that C2
- * may be set to map to deeper sleep states. In this case the latency
- * requirement needs to prevent entering C2 also.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
-{
-	struct pseudo_lock_pm_req *pm_req;
-	int cpu;
-	int ret;
-
-	for_each_cpu(cpu, &plr->d->cpu_mask) {
-		pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
-		if (!pm_req) {
-			rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
-			ret = -ENOMEM;
-			goto out_err;
-		}
-		ret = dev_pm_qos_add_request(get_cpu_device(cpu),
-					     &pm_req->req,
-					     DEV_PM_QOS_RESUME_LATENCY,
-					     30);
-		if (ret < 0) {
-			rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
-					    cpu);
-			kfree(pm_req);
-			ret = -1;
-			goto out_err;
-		}
-		list_add(&pm_req->list, &plr->pm_reqs);
-	}
-
-	return 0;
-
-out_err:
-	pseudo_lock_cstates_relax(plr);
-	return ret;
-}
-
-/**
- * pseudo_lock_region_clear - Reset pseudo-lock region data
- * @plr: pseudo-lock region
- *
- * All content of the pseudo-locked region is reset - any memory allocated
- * freed.
- *
- * Return: void
- */
-static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
-{
-	plr->size = 0;
-	plr->line_size = 0;
-	kfree(plr->kmem);
-	plr->kmem = NULL;
-	plr->r = NULL;
-	if (plr->d)
-		plr->d->plr = NULL;
-	plr->d = NULL;
-	plr->cbm = 0;
-	plr->debugfs_dir = NULL;
-}
-
-/**
- * pseudo_lock_region_init - Initialize pseudo-lock region information
- * @plr: pseudo-lock region
- *
- * Called after user provided a schemata to be pseudo-locked. From the
- * schemata the &struct pseudo_lock_region is on entry already initialized
- * with the resource, domain, and capacity bitmask. Here the information
- * required for pseudo-locking is deduced from this data and &struct
- * pseudo_lock_region initialized further. This information includes:
- * - size in bytes of the region to be pseudo-locked
- * - cache line size to know the stride with which data needs to be accessed
- *   to be pseudo-locked
- * - a cpu associated with the cache instance on which the pseudo-locking
- *   flow can be executed
- *
- * Return: 0 on success, <0 on failure. Descriptive error will be written
- * to last_cmd_status buffer.
- */
-static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
-{
-	struct cpu_cacheinfo *ci;
-	int ret;
-	int i;
-
-	/* Pick the first cpu we find that is associated with the cache. */
-	plr->cpu = cpumask_first(&plr->d->cpu_mask);
-
-	if (!cpu_online(plr->cpu)) {
-		rdt_last_cmd_printf("CPU %u associated with cache not online\n",
-				    plr->cpu);
-		ret = -ENODEV;
-		goto out_region;
-	}
-
-	ci = get_cpu_cacheinfo(plr->cpu);
-
-	plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm);
-
-	for (i = 0; i < ci->num_leaves; i++) {
-		if (ci->info_list[i].level == plr->r->cache_level) {
-			plr->line_size = ci->info_list[i].coherency_line_size;
-			return 0;
-		}
-	}
-
-	ret = -1;
-	rdt_last_cmd_puts("Unable to determine cache line size\n");
-out_region:
-	pseudo_lock_region_clear(plr);
-	return ret;
-}
-
-/**
- * pseudo_lock_init - Initialize a pseudo-lock region
- * @rdtgrp: resource group to which new pseudo-locked region will belong
- *
- * A pseudo-locked region is associated with a resource group. When this
- * association is created the pseudo-locked region is initialized. The
- * details of the pseudo-locked region are not known at this time so only
- * allocation is done and association established.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_init(struct rdtgroup *rdtgrp)
-{
-	struct pseudo_lock_region *plr;
-
-	plr = kzalloc(sizeof(*plr), GFP_KERNEL);
-	if (!plr)
-		return -ENOMEM;
-
-	init_waitqueue_head(&plr->lock_thread_wq);
-	INIT_LIST_HEAD(&plr->pm_reqs);
-	rdtgrp->plr = plr;
-	return 0;
-}
-
-/**
- * pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
- * @plr: pseudo-lock region
- *
- * Initialize the details required to set up the pseudo-locked region and
- * allocate the contiguous memory that will be pseudo-locked to the cache.
- *
- * Return: 0 on success, <0 on failure.  Descriptive error will be written
- * to last_cmd_status buffer.
- */
-static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
-{
-	int ret;
-
-	ret = pseudo_lock_region_init(plr);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * We do not yet support contiguous regions larger than
-	 * KMALLOC_MAX_SIZE.
-	 */
-	if (plr->size > KMALLOC_MAX_SIZE) {
-		rdt_last_cmd_puts("Requested region exceeds maximum size\n");
-		ret = -E2BIG;
-		goto out_region;
-	}
-
-	plr->kmem = kzalloc(plr->size, GFP_KERNEL);
-	if (!plr->kmem) {
-		rdt_last_cmd_puts("Unable to allocate memory\n");
-		ret = -ENOMEM;
-		goto out_region;
+		prefetch_disable_bits = 0x5;
+		break;
 	}
 
-	ret = 0;
-	goto out;
-out_region:
-	pseudo_lock_region_clear(plr);
-out:
-	return ret;
-}
-
-/**
- * pseudo_lock_free - Free a pseudo-locked region
- * @rdtgrp: resource group to which pseudo-locked region belonged
- *
- * The pseudo-locked region's resources have already been released, or not
- * yet created at this point. Now it can be freed and disassociated from the
- * resource group.
- *
- * Return: void
- */
-static void pseudo_lock_free(struct rdtgroup *rdtgrp)
-{
-	pseudo_lock_region_clear(rdtgrp->plr);
-	kfree(rdtgrp->plr);
-	rdtgrp->plr = NULL;
+	return prefetch_disable_bits;
 }
 
 /**
- * pseudo_lock_fn - Load kernel memory into cache
- * @_rdtgrp: resource group to which pseudo-lock region belongs
+ * resctrl_arch_pseudo_lock_fn - Load kernel memory into cache
+ * @_plr: the pseudo-lock region descriptor
  *
  * This is the core pseudo-locking flow.
  *
@@ -414,12 +113,12 @@ static void pseudo_lock_free(struct rdtgroup *rdtgrp)
  *
  * Return: 0. Waiter on waitqueue will be woken on completion.
  */
-static int pseudo_lock_fn(void *_rdtgrp)
+int resctrl_arch_pseudo_lock_fn(void *_plr)
 {
-	struct rdtgroup *rdtgrp = _rdtgrp;
-	struct pseudo_lock_region *plr = rdtgrp->plr;
+	struct pseudo_lock_region *plr = _plr;
 	u32 rmid_p, closid_p;
 	unsigned long i;
+	u64 saved_msr;
 #ifdef CONFIG_KASAN
 	/*
 	 * The registers used for local register variables are also used
@@ -446,7 +145,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
 	 * increase likelihood that allocated cache portion will be filled
 	 * with associated memory.
 	 */
-	native_wbinvd();
+	wbinvd();
 
 	/*
 	 * Always called with interrupts enabled. By disabling interrupts
@@ -463,7 +162,8 @@ static int pseudo_lock_fn(void *_rdtgrp)
 	 * the buffer and evict pseudo-locked memory read earlier from the
 	 * cache.
 	 */
-	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+	saved_msr = native_rdmsrq(MSR_MISC_FEATURE_CONTROL);
+	native_wrmsrq(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits);
 	closid_p = this_cpu_read(pqr_state.cur_closid);
 	rmid_p = this_cpu_read(pqr_state.cur_rmid);
 	mem_r = plr->kmem;
@@ -475,7 +175,8 @@ static int pseudo_lock_fn(void *_rdtgrp)
 	 * pseudo-locked followed by reading of kernel memory to load it
 	 * into the cache.
 	 */
-	__wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
+	native_wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, plr->closid);
+
 	/*
 	 * Cache was flushed earlier. Now access kernel memory to read it
 	 * into cache region associated with just activated plr->closid.
@@ -511,10 +212,10 @@ static int pseudo_lock_fn(void *_rdtgrp)
 	 * Critical section end: restore closid with capacity bitmask that
 	 * does not overlap with pseudo-locked region.
 	 */
-	__wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
+	native_wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, closid_p);
 
 	/* Re-enable the hardware prefetcher(s) */
-	wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+	wrmsrq(MSR_MISC_FEATURE_CONTROL, saved_msr);
 	local_irq_enable();
 
 	plr->thread_done = 1;
@@ -523,339 +224,8 @@ static int pseudo_lock_fn(void *_rdtgrp)
 }
 
 /**
- * rdtgroup_monitor_in_progress - Test if monitoring in progress
- * @rdtgrp: resource group being queried
- *
- * Return: 1 if monitor groups have been created for this resource
- * group, 0 otherwise.
- */
-static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
-{
-	return !list_empty(&rdtgrp->mon.crdtgrp_list);
-}
-
-/**
- * rdtgroup_locksetup_user_restrict - Restrict user access to group
- * @rdtgrp: resource group needing access restricted
- *
- * A resource group used for cache pseudo-locking cannot have cpus or tasks
- * assigned to it. This is communicated to the user by restricting access
- * to all the files that can be used to make such changes.
- *
- * Permissions restored with rdtgroup_locksetup_user_restore()
- *
- * Return: 0 on success, <0 on failure. If a failure occurs during the
- * restriction of access an attempt will be made to restore permissions but
- * the state of the mode of these files will be uncertain when a failure
- * occurs.
- */
-static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
-	if (ret)
-		return ret;
-
-	ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
-	if (ret)
-		goto err_tasks;
-
-	ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
-	if (ret)
-		goto err_cpus;
-
-	if (rdt_mon_capable) {
-		ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
-		if (ret)
-			goto err_cpus_list;
-	}
-
-	ret = 0;
-	goto out;
-
-err_cpus_list:
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
-err_cpus:
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
-err_tasks:
-	rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_locksetup_user_restore - Restore user access to group
- * @rdtgrp: resource group needing access restored
- *
- * Restore all file access previously removed using
- * rdtgroup_locksetup_user_restrict()
- *
- * Return: 0 on success, <0 on failure.  If a failure occurs during the
- * restoration of access an attempt will be made to restrict permissions
- * again but the state of the mode of these files will be uncertain when
- * a failure occurs.
- */
-static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
-	if (ret)
-		return ret;
-
-	ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
-	if (ret)
-		goto err_tasks;
-
-	ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
-	if (ret)
-		goto err_cpus;
-
-	if (rdt_mon_capable) {
-		ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
-		if (ret)
-			goto err_cpus_list;
-	}
-
-	ret = 0;
-	goto out;
-
-err_cpus_list:
-	rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
-err_cpus:
-	rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
-err_tasks:
-	rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_locksetup_enter - Resource group enters locksetup mode
- * @rdtgrp: resource group requested to enter locksetup mode
- *
- * A resource group enters locksetup mode to reflect that it would be used
- * to represent a pseudo-locked region and is in the process of being set
- * up to do so. A resource group used for a pseudo-locked region would
- * lose the closid associated with it so we cannot allow it to have any
- * tasks or cpus assigned nor permit tasks or cpus to be assigned in the
- * future. Monitoring of a pseudo-locked region is not allowed either.
- *
- * The above and more restrictions on a pseudo-locked region are checked
- * for and enforced before the resource group enters the locksetup mode.
- *
- * Returns: 0 if the resource group successfully entered locksetup mode, <0
- * on failure. On failure the last_cmd_status buffer is updated with text to
- * communicate details of failure to the user.
- */
-int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	/*
-	 * The default resource group can neither be removed nor lose the
-	 * default closid associated with it.
-	 */
-	if (rdtgrp == &rdtgroup_default) {
-		rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Cache Pseudo-locking not supported when CDP is enabled.
-	 *
-	 * Some things to consider if you would like to enable this
-	 * support (using L3 CDP as example):
-	 * - When CDP is enabled two separate resources are exposed,
-	 *   L3DATA and L3CODE, but they are actually on the same cache.
-	 *   The implication for pseudo-locking is that if a
-	 *   pseudo-locked region is created on a domain of one
-	 *   resource (eg. L3CODE), then a pseudo-locked region cannot
-	 *   be created on that same domain of the other resource
-	 *   (eg. L3DATA). This is because the creation of a
-	 *   pseudo-locked region involves a call to wbinvd that will
-	 *   affect all cache allocations on particular domain.
-	 * - Considering the previous, it may be possible to only
-	 *   expose one of the CDP resources to pseudo-locking and
-	 *   hide the other. For example, we could consider to only
-	 *   expose L3DATA and since the L3 cache is unified it is
-	 *   still possible to place instructions there are execute it.
-	 * - If only one region is exposed to pseudo-locking we should
-	 *   still keep in mind that availability of a portion of cache
-	 *   for pseudo-locking should take into account both resources.
-	 *   Similarly, if a pseudo-locked region is created in one
-	 *   resource, the portion of cache used by it should be made
-	 *   unavailable to all future allocations from both resources.
-	 */
-	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled ||
-	    rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) {
-		rdt_last_cmd_puts("CDP enabled\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Not knowing the bits to disable prefetching implies that this
-	 * platform does not support Cache Pseudo-Locking.
-	 */
-	prefetch_disable_bits = get_prefetch_disable_bits();
-	if (prefetch_disable_bits == 0) {
-		rdt_last_cmd_puts("Pseudo-locking not supported\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_monitor_in_progress(rdtgrp)) {
-		rdt_last_cmd_puts("Monitoring in progress\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_tasks_assigned(rdtgrp)) {
-		rdt_last_cmd_puts("Tasks assigned to resource group\n");
-		return -EINVAL;
-	}
-
-	if (!cpumask_empty(&rdtgrp->cpu_mask)) {
-		rdt_last_cmd_puts("CPUs assigned to resource group\n");
-		return -EINVAL;
-	}
-
-	if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
-		rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
-		return -EIO;
-	}
-
-	ret = pseudo_lock_init(rdtgrp);
-	if (ret) {
-		rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
-		goto out_release;
-	}
-
-	/*
-	 * If this system is capable of monitoring a rmid would have been
-	 * allocated when the control group was created. This is not needed
-	 * anymore when this group would be used for pseudo-locking. This
-	 * is safe to call on platforms not capable of monitoring.
-	 */
-	free_rmid(rdtgrp->mon.rmid);
-
-	ret = 0;
-	goto out;
-
-out_release:
-	rdtgroup_locksetup_user_restore(rdtgrp);
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_locksetup_exit - resource group exist locksetup mode
- * @rdtgrp: resource group
- *
- * When a resource group exits locksetup mode the earlier restrictions are
- * lifted.
- *
- * Return: 0 on success, <0 on failure
- */
-int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
-{
-	int ret;
-
-	if (rdt_mon_capable) {
-		ret = alloc_rmid();
-		if (ret < 0) {
-			rdt_last_cmd_puts("Out of RMIDs\n");
-			return ret;
-		}
-		rdtgrp->mon.rmid = ret;
-	}
-
-	ret = rdtgroup_locksetup_user_restore(rdtgrp);
-	if (ret) {
-		free_rmid(rdtgrp->mon.rmid);
-		return ret;
-	}
-
-	pseudo_lock_free(rdtgrp);
-	return 0;
-}
-
-/**
- * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
- * @d: RDT domain
- * @cbm: CBM to test
- *
- * @d represents a cache instance and @cbm a capacity bitmask that is
- * considered for it. Determine if @cbm overlaps with any existing
- * pseudo-locked region on @d.
- *
- * @cbm is unsigned long, even if only 32 bits are used, to make the
- * bitmap functions work correctly.
- *
- * Return: true if @cbm overlaps with pseudo-locked region on @d, false
- * otherwise.
- */
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
-{
-	unsigned int cbm_len;
-	unsigned long cbm_b;
-
-	if (d->plr) {
-		cbm_len = d->plr->r->cache.cbm_len;
-		cbm_b = d->plr->cbm;
-		if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
-			return true;
-	}
-	return false;
-}
-
-/**
- * rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
- * @d: RDT domain under test
- *
- * The setup of a pseudo-locked region affects all cache instances within
- * the hierarchy of the region. It is thus essential to know if any
- * pseudo-locked regions exist within a cache hierarchy to prevent any
- * attempts to create new pseudo-locked regions in the same hierarchy.
- *
- * Return: true if a pseudo-locked region exists in the hierarchy of @d or
- *         if it is not possible to test due to memory allocation issue,
- *         false otherwise.
- */
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
-{
-	cpumask_var_t cpu_with_psl;
-	struct rdt_resource *r;
-	struct rdt_domain *d_i;
-	bool ret = false;
-
-	if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
-		return true;
-
-	/*
-	 * First determine which cpus have pseudo-locked regions
-	 * associated with them.
-	 */
-	for_each_alloc_enabled_rdt_resource(r) {
-		list_for_each_entry(d_i, &r->domains, list) {
-			if (d_i->plr)
-				cpumask_or(cpu_with_psl, cpu_with_psl,
-					   &d_i->cpu_mask);
-		}
-	}
-
-	/*
-	 * Next test if new pseudo-locked region would intersect with
-	 * existing region.
-	 */
-	if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
-		ret = true;
-
-	free_cpumask_var(cpu_with_psl);
-	return ret;
-}
-
-/**
- * measure_cycles_lat_fn - Measure cycle latency to read pseudo-locked memory
+ * resctrl_arch_measure_cycles_lat_fn - Measure cycle latency to read
+ *                                      pseudo-locked memory
  * @_plr: pseudo-lock region to measure
  *
  * There is no deterministic way to test if a memory region is cached. One
@@ -868,9 +238,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
  *
  * Return: 0. Waiter on waitqueue will be woken on completion.
  */
-static int measure_cycles_lat_fn(void *_plr)
+int resctrl_arch_measure_cycles_lat_fn(void *_plr)
 {
 	struct pseudo_lock_region *plr = _plr;
+	u32 saved_low, saved_high;
 	unsigned long i;
 	u64 start, end;
 	void *mem_r;
@@ -879,7 +250,8 @@ static int measure_cycles_lat_fn(void *_plr)
 	/*
 	 * Disable hardware prefetchers.
 	 */
-	wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+	rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
+	wrmsrq(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits);
 	mem_r = READ_ONCE(plr->kmem);
 	/*
 	 * Dummy execute of the time measurement to load the needed
@@ -895,7 +267,7 @@ static int measure_cycles_lat_fn(void *_plr)
 		end = rdtsc_ordered();
 		trace_pseudo_lock_mem_latency((u32)(end - start));
 	}
-	wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+	wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
 	local_irq_enable();
 	plr->thread_done = 1;
 	wake_up_interruptible(&plr->lock_thread_wq);
@@ -940,6 +312,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
 	u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
 	struct perf_event *miss_event, *hit_event;
 	int hit_pmcnum, miss_pmcnum;
+	u32 saved_low, saved_high;
 	unsigned int line_size;
 	unsigned int size;
 	unsigned long i;
@@ -973,7 +346,8 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
 	/*
 	 * Disable hardware prefetchers.
 	 */
-	wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+	rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
+	wrmsrq(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits);
 
 	/* Initialize rest of local variables */
 	/*
@@ -991,8 +365,8 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
 	 * used in L1 cache, second to capture accurate value that does not
 	 * include cache misses incurred because of instruction loads.
 	 */
-	rdpmcl(hit_pmcnum, hits_before);
-	rdpmcl(miss_pmcnum, miss_before);
+	hits_before = rdpmc(hit_pmcnum);
+	miss_before = rdpmc(miss_pmcnum);
 	/*
 	 * From SDM: Performing back-to-back fast reads are not guaranteed
 	 * to be monotonic.
@@ -1000,8 +374,8 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
 	 * before proceeding.
 	 */
 	rmb();
-	rdpmcl(hit_pmcnum, hits_before);
-	rdpmcl(miss_pmcnum, miss_before);
+	hits_before = rdpmc(hit_pmcnum);
+	miss_before = rdpmc(miss_pmcnum);
 	/*
 	 * Use LFENCE to ensure all previous instructions are retired
 	 * before proceeding.
@@ -1023,15 +397,15 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
 	 * before proceeding.
 	 */
 	rmb();
-	rdpmcl(hit_pmcnum, hits_after);
-	rdpmcl(miss_pmcnum, miss_after);
+	hits_after = rdpmc(hit_pmcnum);
+	miss_after = rdpmc(miss_pmcnum);
 	/*
 	 * Use LFENCE to ensure all previous instructions are retired
 	 * before proceeding.
 	 */
 	rmb();
 	/* Re-enable hardware prefetchers */
-	wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+	wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
 	local_irq_enable();
 out_hit:
 	perf_event_release_kernel(hit_event);
@@ -1048,7 +422,7 @@ out:
 	return 0;
 }
 
-static int measure_l2_residency(void *_plr)
+int resctrl_arch_measure_l2_residency(void *_plr)
 {
 	struct pseudo_lock_region *plr = _plr;
 	struct residency_counts counts = {0};
@@ -1061,9 +435,9 @@ static int measure_l2_residency(void *_plr)
 	 *     L2_HIT   02H
 	 *     L2_MISS  10H
 	 */
-	switch (boot_cpu_data.x86_model) {
-	case INTEL_FAM6_ATOM_GOLDMONT:
-	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	switch (boot_cpu_data.x86_vfm) {
+	case INTEL_ATOM_GOLDMONT:
+	case INTEL_ATOM_GOLDMONT_PLUS:
 		perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
 						   .umask = 0x10);
 		perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
@@ -1086,7 +460,7 @@ out:
 	return 0;
 }
 
-static int measure_l3_residency(void *_plr)
+int resctrl_arch_measure_l3_residency(void *_plr)
 {
 	struct pseudo_lock_region *plr = _plr;
 	struct residency_counts counts = {0};
@@ -1100,8 +474,8 @@ static int measure_l3_residency(void *_plr)
 	 *       MISS      41H
 	 */
 
-	switch (boot_cpu_data.x86_model) {
-	case INTEL_FAM6_BROADWELL_X:
+	switch (boot_cpu_data.x86_vfm) {
+	case INTEL_BROADWELL_X:
 		/* On BDW the hit event counts references, not hits */
 		perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
 						  .umask = 0x4f);
@@ -1119,7 +493,7 @@ static int measure_l3_residency(void *_plr)
 	 */
 
 	counts.miss_after -= counts.miss_before;
-	if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
+	if (boot_cpu_data.x86_vfm == INTEL_BROADWELL_X) {
 		/*
 		 * On BDW references and misses are counted, need to adjust.
 		 * Sometimes the "hits" counter is a bit more than the
@@ -1141,454 +515,3 @@ out:
 	wake_up_interruptible(&plr->lock_thread_wq);
 	return 0;
 }
-
-/**
- * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
- * @rdtgrp: Resource group to which the pseudo-locked region belongs.
- * @sel: Selector of which measurement to perform on a pseudo-locked region.
- *
- * The measurement of latency to access a pseudo-locked region should be
- * done from a cpu that is associated with that pseudo-locked region.
- * Determine which cpu is associated with this region and start a thread on
- * that cpu to perform the measurement, wait for that thread to complete.
- *
- * Return: 0 on success, <0 on failure
- */
-static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
-{
-	struct pseudo_lock_region *plr = rdtgrp->plr;
-	struct task_struct *thread;
-	unsigned int cpu;
-	int ret = -1;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	if (rdtgrp->flags & RDT_DELETED) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	if (!plr->d) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	plr->thread_done = 0;
-	cpu = cpumask_first(&plr->d->cpu_mask);
-	if (!cpu_online(cpu)) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	plr->cpu = cpu;
-
-	if (sel == 1)
-		thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
-						cpu_to_node(cpu),
-						"pseudo_lock_measure/%u",
-						cpu);
-	else if (sel == 2)
-		thread = kthread_create_on_node(measure_l2_residency, plr,
-						cpu_to_node(cpu),
-						"pseudo_lock_measure/%u",
-						cpu);
-	else if (sel == 3)
-		thread = kthread_create_on_node(measure_l3_residency, plr,
-						cpu_to_node(cpu),
-						"pseudo_lock_measure/%u",
-						cpu);
-	else
-		goto out;
-
-	if (IS_ERR(thread)) {
-		ret = PTR_ERR(thread);
-		goto out;
-	}
-	kthread_bind(thread, cpu);
-	wake_up_process(thread);
-
-	ret = wait_event_interruptible(plr->lock_thread_wq,
-				       plr->thread_done == 1);
-	if (ret < 0)
-		goto out;
-
-	ret = 0;
-
-out:
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-	return ret;
-}
-
-static ssize_t pseudo_lock_measure_trigger(struct file *file,
-					   const char __user *user_buf,
-					   size_t count, loff_t *ppos)
-{
-	struct rdtgroup *rdtgrp = file->private_data;
-	size_t buf_size;
-	char buf[32];
-	int ret;
-	int sel;
-
-	buf_size = min(count, (sizeof(buf) - 1));
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	ret = kstrtoint(buf, 10, &sel);
-	if (ret == 0) {
-		if (sel != 1 && sel != 2 && sel != 3)
-			return -EINVAL;
-		ret = debugfs_file_get(file->f_path.dentry);
-		if (ret)
-			return ret;
-		ret = pseudo_lock_measure_cycles(rdtgrp, sel);
-		if (ret == 0)
-			ret = count;
-		debugfs_file_put(file->f_path.dentry);
-	}
-
-	return ret;
-}
-
-static const struct file_operations pseudo_measure_fops = {
-	.write = pseudo_lock_measure_trigger,
-	.open = simple_open,
-	.llseek = default_llseek,
-};
-
-/**
- * rdtgroup_pseudo_lock_create - Create a pseudo-locked region
- * @rdtgrp: resource group to which pseudo-lock region belongs
- *
- * Called when a resource group in the pseudo-locksetup mode receives a
- * valid schemata that should be pseudo-locked. Since the resource group is
- * in pseudo-locksetup mode the &struct pseudo_lock_region has already been
- * allocated and initialized with the essential information. If a failure
- * occurs the resource group remains in the pseudo-locksetup mode with the
- * &struct pseudo_lock_region associated with it, but cleared from all
- * information and ready for the user to re-attempt pseudo-locking by
- * writing the schemata again.
- *
- * Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
- * on failure. Descriptive error will be written to last_cmd_status buffer.
- */
-int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
-{
-	struct pseudo_lock_region *plr = rdtgrp->plr;
-	struct task_struct *thread;
-	unsigned int new_minor;
-	struct device *dev;
-	int ret;
-
-	ret = pseudo_lock_region_alloc(plr);
-	if (ret < 0)
-		return ret;
-
-	ret = pseudo_lock_cstates_constrain(plr);
-	if (ret < 0) {
-		ret = -EINVAL;
-		goto out_region;
-	}
-
-	plr->thread_done = 0;
-
-	thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
-					cpu_to_node(plr->cpu),
-					"pseudo_lock/%u", plr->cpu);
-	if (IS_ERR(thread)) {
-		ret = PTR_ERR(thread);
-		rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
-		goto out_cstates;
-	}
-
-	kthread_bind(thread, plr->cpu);
-	wake_up_process(thread);
-
-	ret = wait_event_interruptible(plr->lock_thread_wq,
-				       plr->thread_done == 1);
-	if (ret < 0) {
-		/*
-		 * If the thread does not get on the CPU for whatever
-		 * reason and the process which sets up the region is
-		 * interrupted then this will leave the thread in runnable
-		 * state and once it gets on the CPU it will dereference
-		 * the cleared, but not freed, plr struct resulting in an
-		 * empty pseudo-locking loop.
-		 */
-		rdt_last_cmd_puts("Locking thread interrupted\n");
-		goto out_cstates;
-	}
-
-	ret = pseudo_lock_minor_get(&new_minor);
-	if (ret < 0) {
-		rdt_last_cmd_puts("Unable to obtain a new minor number\n");
-		goto out_cstates;
-	}
-
-	/*
-	 * Unlock access but do not release the reference. The
-	 * pseudo-locked region will still be here on return.
-	 *
-	 * The mutex has to be released temporarily to avoid a potential
-	 * deadlock with the mm->mmap_lock which is obtained in the
-	 * device_create() and debugfs_create_dir() callpath below as well as
-	 * before the mmap() callback is called.
-	 */
-	mutex_unlock(&rdtgroup_mutex);
-
-	if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
-		plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
-						      debugfs_resctrl);
-		if (!IS_ERR_OR_NULL(plr->debugfs_dir))
-			debugfs_create_file("pseudo_lock_measure", 0200,
-					    plr->debugfs_dir, rdtgrp,
-					    &pseudo_measure_fops);
-	}
-
-	dev = device_create(pseudo_lock_class, NULL,
-			    MKDEV(pseudo_lock_major, new_minor),
-			    rdtgrp, "%s", rdtgrp->kn->name);
-
-	mutex_lock(&rdtgroup_mutex);
-
-	if (IS_ERR(dev)) {
-		ret = PTR_ERR(dev);
-		rdt_last_cmd_printf("Failed to create character device: %d\n",
-				    ret);
-		goto out_debugfs;
-	}
-
-	/* We released the mutex - check if group was removed while we did so */
-	if (rdtgrp->flags & RDT_DELETED) {
-		ret = -ENODEV;
-		goto out_device;
-	}
-
-	plr->minor = new_minor;
-
-	rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
-	closid_free(rdtgrp->closid);
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
-	rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
-
-	ret = 0;
-	goto out;
-
-out_device:
-	device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
-out_debugfs:
-	debugfs_remove_recursive(plr->debugfs_dir);
-	pseudo_lock_minor_release(new_minor);
-out_cstates:
-	pseudo_lock_cstates_relax(plr);
-out_region:
-	pseudo_lock_region_clear(plr);
-out:
-	return ret;
-}
-
-/**
- * rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
- * @rdtgrp: resource group to which the pseudo-locked region belongs
- *
- * The removal of a pseudo-locked region can be initiated when the resource
- * group is removed from user space via a "rmdir" from userspace or the
- * unmount of the resctrl filesystem. On removal the resource group does
- * not go back to pseudo-locksetup mode before it is removed, instead it is
- * removed directly. There is thus asymmetry with the creation where the
- * &struct pseudo_lock_region is removed here while it was not created in
- * rdtgroup_pseudo_lock_create().
- *
- * Return: void
- */
-void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
-{
-	struct pseudo_lock_region *plr = rdtgrp->plr;
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		/*
-		 * Default group cannot be a pseudo-locked region so we can
-		 * free closid here.
-		 */
-		closid_free(rdtgrp->closid);
-		goto free;
-	}
-
-	pseudo_lock_cstates_relax(plr);
-	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
-	device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
-	pseudo_lock_minor_release(plr->minor);
-
-free:
-	pseudo_lock_free(rdtgrp);
-}
-
-static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
-{
-	struct rdtgroup *rdtgrp;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	rdtgrp = region_find_by_minor(iminor(inode));
-	if (!rdtgrp) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-
-	filp->private_data = rdtgrp;
-	atomic_inc(&rdtgrp->waitcount);
-	/* Perform a non-seekable open - llseek is not supported */
-	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
-
-	mutex_unlock(&rdtgroup_mutex);
-
-	return 0;
-}
-
-static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
-{
-	struct rdtgroup *rdtgrp;
-
-	mutex_lock(&rdtgroup_mutex);
-	rdtgrp = filp->private_data;
-	WARN_ON(!rdtgrp);
-	if (!rdtgrp) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-	filp->private_data = NULL;
-	atomic_dec(&rdtgrp->waitcount);
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
-{
-	/* Not supported */
-	return -EINVAL;
-}
-
-static const struct vm_operations_struct pseudo_mmap_ops = {
-	.mremap = pseudo_lock_dev_mremap,
-};
-
-static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	unsigned long vsize = vma->vm_end - vma->vm_start;
-	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
-	struct pseudo_lock_region *plr;
-	struct rdtgroup *rdtgrp;
-	unsigned long physical;
-	unsigned long psize;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	rdtgrp = filp->private_data;
-	WARN_ON(!rdtgrp);
-	if (!rdtgrp) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-
-	plr = rdtgrp->plr;
-
-	if (!plr->d) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENODEV;
-	}
-
-	/*
-	 * Task is required to run with affinity to the cpus associated
-	 * with the pseudo-locked region. If this is not the case the task
-	 * may be scheduled elsewhere and invalidate entries in the
-	 * pseudo-locked region.
-	 */
-	if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -EINVAL;
-	}
-
-	physical = __pa(plr->kmem) >> PAGE_SHIFT;
-	psize = plr->size - off;
-
-	if (off > plr->size) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENOSPC;
-	}
-
-	/*
-	 * Ensure changes are carried directly to the memory being mapped,
-	 * do not allow copy-on-write mapping.
-	 */
-	if (!(vma->vm_flags & VM_SHARED)) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -EINVAL;
-	}
-
-	if (vsize > psize) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -ENOSPC;
-	}
-
-	memset(plr->kmem + off, 0, vsize);
-
-	if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
-			    vsize, vma->vm_page_prot)) {
-		mutex_unlock(&rdtgroup_mutex);
-		return -EAGAIN;
-	}
-	vma->vm_ops = &pseudo_mmap_ops;
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static const struct file_operations pseudo_lock_dev_fops = {
-	.owner =	THIS_MODULE,
-	.llseek =	no_llseek,
-	.read =		NULL,
-	.write =	NULL,
-	.open =		pseudo_lock_dev_open,
-	.release =	pseudo_lock_dev_release,
-	.mmap =		pseudo_lock_dev_mmap,
-};
-
-static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
-{
-	struct rdtgroup *rdtgrp;
-
-	rdtgrp = dev_get_drvdata(dev);
-	if (mode)
-		*mode = 0600;
-	return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
-}
-
-int rdt_pseudo_lock_init(void)
-{
-	int ret;
-
-	ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
-	if (ret < 0)
-		return ret;
-
-	pseudo_lock_major = ret;
-
-	pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock");
-	if (IS_ERR(pseudo_lock_class)) {
-		ret = PTR_ERR(pseudo_lock_class);
-		unregister_chrdev(pseudo_lock_major, "pseudo_lock");
-		return ret;
-	}
-
-	pseudo_lock_class->devnode = pseudo_lock_devnode;
-	return 0;
-}
-
-void rdt_pseudo_lock_release(void)
-{
-	class_destroy(pseudo_lock_class);
-	pseudo_lock_class = NULL;
-	unregister_chrdev(pseudo_lock_major, "pseudo_lock");
-	pseudo_lock_major = 0;
-}
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h b/arch/x86/kernel/cpu/resctrl/pseudo_lock_trace.h
index 428ebbd4270b..7c8aef08010f 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock_trace.h
@@ -2,8 +2,8 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM resctrl
 
-#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_PSEUDO_LOCK_H
+#if !defined(_X86_RESCTRL_PSEUDO_LOCK_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _X86_RESCTRL_PSEUDO_LOCK_TRACE_H
 
 #include <linux/tracepoint.h>
 
@@ -35,9 +35,11 @@ TRACE_EVENT(pseudo_lock_l3,
 	    TP_printk("hits=%llu miss=%llu",
 		      __entry->l3_hits, __entry->l3_miss));
 
-#endif /* _TRACE_PSEUDO_LOCK_H */
+#endif /* _X86_RESCTRL_PSEUDO_LOCK_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE pseudo_lock_event
+
+#define TRACE_INCLUDE_FILE pseudo_lock_trace
+
 #include <trace/define_trace.h>
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 01fd30e7829d..885026468440 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -12,13 +12,13 @@
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
-#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/debugfs.h>
 #include <linux/fs.h>
 #include <linux/fs_parser.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
+#include <linux/resctrl.h>
 #include <linux/seq_buf.h>
 #include <linux/seq_file.h>
 #include <linux/sched/signal.h>
@@ -29,281 +29,28 @@
 
 #include <uapi/linux/magic.h>
 
-#include <asm/resctrl.h>
+#include <asm/msr.h>
 #include "internal.h"
 
 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
-DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
-DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
-static struct kernfs_root *rdt_root;
-struct rdtgroup rdtgroup_default;
-LIST_HEAD(rdt_all_groups);
-
-/* Kernel fs node for "info" directory under root */
-static struct kernfs_node *kn_info;
-
-/* Kernel fs node for "mon_groups" directory under root */
-static struct kernfs_node *kn_mongrp;
-
-/* Kernel fs node for "mon_data" directory under root */
-static struct kernfs_node *kn_mondata;
-
-static struct seq_buf last_cmd_status;
-static char last_cmd_status_buf[512];
-
-struct dentry *debugfs_resctrl;
-
-void rdt_last_cmd_clear(void)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-	seq_buf_clear(&last_cmd_status);
-}
-
-void rdt_last_cmd_puts(const char *s)
-{
-	lockdep_assert_held(&rdtgroup_mutex);
-	seq_buf_puts(&last_cmd_status, s);
-}
-
-void rdt_last_cmd_printf(const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	lockdep_assert_held(&rdtgroup_mutex);
-	seq_buf_vprintf(&last_cmd_status, fmt, ap);
-	va_end(ap);
-}
-
-/*
- * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
- * we can keep a bitmap of free CLOSIDs in a single integer.
- *
- * Using a global CLOSID across all resources has some advantages and
- * some drawbacks:
- * + We can simply set "current->closid" to assign a task to a resource
- *   group.
- * + Context switch code can avoid extra memory references deciding which
- *   CLOSID to load into the PQR_ASSOC MSR
- * - We give up some options in configuring resource groups across multi-socket
- *   systems.
- * - Our choices on how to configure each resource become progressively more
- *   limited as the number of resources grows.
- */
-static int closid_free_map;
-static int closid_free_map_len;
-
-int closids_supported(void)
-{
-	return closid_free_map_len;
-}
-
-static void closid_init(void)
-{
-	struct rdt_resource *r;
-	int rdt_min_closid = 32;
-
-	/* Compute rdt_min_closid across all resources */
-	for_each_alloc_enabled_rdt_resource(r)
-		rdt_min_closid = min(rdt_min_closid, r->num_closid);
-
-	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
-
-	/* CLOSID 0 is always reserved for the default group */
-	closid_free_map &= ~1;
-	closid_free_map_len = rdt_min_closid;
-}
-
-static int closid_alloc(void)
-{
-	u32 closid = ffs(closid_free_map);
-
-	if (closid == 0)
-		return -ENOSPC;
-	closid--;
-	closid_free_map &= ~(1 << closid);
-
-	return closid;
-}
-
-void closid_free(int closid)
-{
-	closid_free_map |= 1 << closid;
-}
-
-/**
- * closid_allocated - test if provided closid is in use
- * @closid: closid to be tested
- *
- * Return: true if @closid is currently associated with a resource group,
- * false if @closid is free
- */
-static bool closid_allocated(unsigned int closid)
-{
-	return (closid_free_map & (1 << closid)) == 0;
-}
-
-/**
- * rdtgroup_mode_by_closid - Return mode of resource group with closid
- * @closid: closid if the resource group
- *
- * Each resource group is associated with a @closid. Here the mode
- * of a resource group can be queried by searching for it using its closid.
- *
- * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
- */
-enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
-{
-	struct rdtgroup *rdtgrp;
-
-	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
-		if (rdtgrp->closid == closid)
-			return rdtgrp->mode;
-	}
-
-	return RDT_NUM_MODES;
-}
-
-static const char * const rdt_mode_str[] = {
-	[RDT_MODE_SHAREABLE]		= "shareable",
-	[RDT_MODE_EXCLUSIVE]		= "exclusive",
-	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
-	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
-};
-
-/**
- * rdtgroup_mode_str - Return the string representation of mode
- * @mode: the resource group mode as &enum rdtgroup_mode
- *
- * Return: string representation of valid mode, "unknown" otherwise
- */
-static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
-{
-	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
-		return "unknown";
-
-	return rdt_mode_str[mode];
-}
-
-/* set uid and gid of rdtgroup dirs and files to that of the creator */
-static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
-{
-	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
-				.ia_uid = current_fsuid(),
-				.ia_gid = current_fsgid(), };
 
-	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
-	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
-		return 0;
-
-	return kernfs_setattr(kn, &iattr);
-}
-
-static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
-{
-	struct kernfs_node *kn;
-	int ret;
-
-	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
-				  0, rft->kf_ops, rft, NULL, NULL);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret) {
-		kernfs_remove(kn);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
-{
-	struct kernfs_open_file *of = m->private;
-	struct rftype *rft = of->kn->priv;
-
-	if (rft->seq_show)
-		return rft->seq_show(of, m, arg);
-	return 0;
-}
-
-static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
-				   size_t nbytes, loff_t off)
-{
-	struct rftype *rft = of->kn->priv;
-
-	if (rft->write)
-		return rft->write(of, buf, nbytes, off);
-
-	return -EINVAL;
-}
-
-static const struct kernfs_ops rdtgroup_kf_single_ops = {
-	.atomic_write_len	= PAGE_SIZE,
-	.write			= rdtgroup_file_write,
-	.seq_show		= rdtgroup_seqfile_show,
-};
-
-static const struct kernfs_ops kf_mondata_ops = {
-	.atomic_write_len	= PAGE_SIZE,
-	.seq_show		= rdtgroup_mondata_show,
-};
-
-static bool is_cpu_list(struct kernfs_open_file *of)
-{
-	struct rftype *rft = of->kn->priv;
-
-	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
-}
-
-static int rdtgroup_cpus_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-	struct cpumask *mask;
-	int ret = 0;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-
-	if (rdtgrp) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-			if (!rdtgrp->plr->d) {
-				rdt_last_cmd_clear();
-				rdt_last_cmd_puts("Cache domain offline\n");
-				ret = -ENODEV;
-			} else {
-				mask = &rdtgrp->plr->d->cpu_mask;
-				seq_printf(s, is_cpu_list(of) ?
-					   "%*pbl\n" : "%*pb\n",
-					   cpumask_pr_args(mask));
-			}
-		} else {
-			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
-				   cpumask_pr_args(&rdtgrp->cpu_mask));
-		}
-	} else {
-		ret = -ENOENT;
-	}
-	rdtgroup_kn_unlock(of->kn);
+DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
 
-	return ret;
-}
+DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
 
 /*
- * This is safe against resctrl_sched_in() called from __switch_to()
+ * This is safe against resctrl_arch_sched_in() called from __switch_to()
  * because __switch_to() is executed with interrupts disabled. A local call
  * from update_closid_rmid() is protected against __switch_to() because
  * preemption is disabled.
  */
-static void update_cpu_closid_rmid(void *info)
+void resctrl_arch_sync_cpu_closid_rmid(void *info)
 {
-	struct rdtgroup *r = info;
+	struct resctrl_cpu_defaults *r = info;
 
 	if (r) {
 		this_cpu_write(pqr_state.default_closid, r->closid);
-		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+		this_cpu_write(pqr_state.default_rmid, r->rmid);
 	}
 
 	/*
@@ -311,1573 +58,88 @@ static void update_cpu_closid_rmid(void *info)
 	 * executing task might have its own closid selected. Just reuse
 	 * the context switch code.
 	 */
-	resctrl_sched_in();
-}
-
-/*
- * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
- *
- * Per task closids/rmids must have been set up before calling this function.
- */
-static void
-update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
-{
-	int cpu = get_cpu();
-
-	if (cpumask_test_cpu(cpu, cpu_mask))
-		update_cpu_closid_rmid(r);
-	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
-	put_cpu();
+	resctrl_arch_sched_in(current);
 }
 
-static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
-			  cpumask_var_t tmpmask)
-{
-	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
-	struct list_head *head;
-
-	/* Check whether cpus belong to parent ctrl group */
-	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
-	if (cpumask_weight(tmpmask)) {
-		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
-		return -EINVAL;
-	}
-
-	/* Check whether cpus are dropped from this group */
-	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-	if (cpumask_weight(tmpmask)) {
-		/* Give any dropped cpus to parent rdtgroup */
-		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
-		update_closid_rmid(tmpmask, prgrp);
-	}
-
-	/*
-	 * If we added cpus, remove them from previous group that owned them
-	 * and update per-cpu rmid
-	 */
-	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-	if (cpumask_weight(tmpmask)) {
-		head = &prgrp->mon.crdtgrp_list;
-		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
-			if (crgrp == rdtgrp)
-				continue;
-			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
-				       tmpmask);
-		}
-		update_closid_rmid(tmpmask, rdtgrp);
-	}
-
-	/* Done pushing/pulling - update this group with new mask */
-	cpumask_copy(&rdtgrp->cpu_mask, newmask);
-
-	return 0;
-}
-
-static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
-{
-	struct rdtgroup *crgrp;
-
-	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
-	/* update the child mon group masks as well*/
-	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
-		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
-}
-
-static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
-			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
-{
-	struct rdtgroup *r, *crgrp;
-	struct list_head *head;
-
-	/* Check whether cpus are dropped from this group */
-	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-	if (cpumask_weight(tmpmask)) {
-		/* Can't drop from default group */
-		if (rdtgrp == &rdtgroup_default) {
-			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
-			return -EINVAL;
-		}
-
-		/* Give any dropped cpus to rdtgroup_default */
-		cpumask_or(&rdtgroup_default.cpu_mask,
-			   &rdtgroup_default.cpu_mask, tmpmask);
-		update_closid_rmid(tmpmask, &rdtgroup_default);
-	}
-
-	/*
-	 * If we added cpus, remove them from previous group and
-	 * the prev group's child groups that owned them
-	 * and update per-cpu closid/rmid.
-	 */
-	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-	if (cpumask_weight(tmpmask)) {
-		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
-			if (r == rdtgrp)
-				continue;
-			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
-			if (cpumask_weight(tmpmask1))
-				cpumask_rdtgrp_clear(r, tmpmask1);
-		}
-		update_closid_rmid(tmpmask, rdtgrp);
-	}
-
-	/* Done pushing/pulling - update this group with new mask */
-	cpumask_copy(&rdtgrp->cpu_mask, newmask);
-
-	/*
-	 * Clear child mon group masks since there is a new parent mask
-	 * now and update the rmid for the cpus the child lost.
-	 */
-	head = &rdtgrp->mon.crdtgrp_list;
-	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
-		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
-		update_closid_rmid(tmpmask, rdtgrp);
-		cpumask_clear(&crgrp->cpu_mask);
-	}
-
-	return 0;
-}
-
-static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
-				   char *buf, size_t nbytes, loff_t off)
-{
-	cpumask_var_t tmpmask, newmask, tmpmask1;
-	struct rdtgroup *rdtgrp;
-	int ret;
-
-	if (!buf)
-		return -EINVAL;
-
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
-		return -ENOMEM;
-	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
-		free_cpumask_var(tmpmask);
-		return -ENOMEM;
-	}
-	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
-		free_cpumask_var(tmpmask);
-		free_cpumask_var(newmask);
-		return -ENOMEM;
-	}
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		ret = -ENOENT;
-		goto unlock;
-	}
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
-	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Pseudo-locking in progress\n");
-		goto unlock;
-	}
-
-	if (is_cpu_list(of))
-		ret = cpulist_parse(buf, newmask);
-	else
-		ret = cpumask_parse(buf, newmask);
-
-	if (ret) {
-		rdt_last_cmd_puts("Bad CPU list/mask\n");
-		goto unlock;
-	}
-
-	/* check that user didn't specify any offline cpus */
-	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
-	if (cpumask_weight(tmpmask)) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Can only assign online CPUs\n");
-		goto unlock;
-	}
-
-	if (rdtgrp->type == RDTCTRL_GROUP)
-		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
-	else if (rdtgrp->type == RDTMON_GROUP)
-		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
-	else
-		ret = -EINVAL;
-
-unlock:
-	rdtgroup_kn_unlock(of->kn);
-	free_cpumask_var(tmpmask);
-	free_cpumask_var(newmask);
-	free_cpumask_var(tmpmask1);
-
-	return ret ?: nbytes;
-}
+#define INVALID_CONFIG_INDEX   UINT_MAX
 
 /**
- * rdtgroup_remove - the helper to remove resource group safely
- * @rdtgrp: resource group to remove
- *
- * On resource group creation via a mkdir, an extra kernfs_node reference is
- * taken to ensure that the rdtgroup structure remains accessible for the
- * rdtgroup_kn_unlock() calls where it is removed.
- *
- * Drop the extra reference here, then free the rdtgroup structure.
+ * mon_event_config_index_get - get the hardware index for the
+ *                              configurable event
+ * @evtid: event id.
  *
- * Return: void
+ * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
+ *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
+ *         INVALID_CONFIG_INDEX for invalid evtid
  */
-static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+static inline unsigned int mon_event_config_index_get(u32 evtid)
 {
-	kernfs_put(rdtgrp->kn);
-	kfree(rdtgrp);
-}
-
-static void _update_task_closid_rmid(void *task)
-{
-	/*
-	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
-	 * Otherwise, the MSR is updated when the task is scheduled in.
-	 */
-	if (task == current)
-		resctrl_sched_in();
-}
-
-static void update_task_closid_rmid(struct task_struct *t)
-{
-	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
-		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
-	else
-		_update_task_closid_rmid(t);
-}
-
-static int __rdtgroup_move_task(struct task_struct *tsk,
-				struct rdtgroup *rdtgrp)
-{
-	/* If the task is already in rdtgrp, no need to move the task. */
-	if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
-	     tsk->rmid == rdtgrp->mon.rmid) ||
-	    (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
-	     tsk->closid == rdtgrp->mon.parent->closid))
+	switch (evtid) {
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
 		return 0;
-
-	/*
-	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
-	 * updated by them.
-	 *
-	 * For ctrl_mon groups, move both closid and rmid.
-	 * For monitor groups, can move the tasks only from
-	 * their parent CTRL group.
-	 */
-
-	if (rdtgrp->type == RDTCTRL_GROUP) {
-		WRITE_ONCE(tsk->closid, rdtgrp->closid);
-		WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
-	} else if (rdtgrp->type == RDTMON_GROUP) {
-		if (rdtgrp->mon.parent->closid == tsk->closid) {
-			WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
-		} else {
-			rdt_last_cmd_puts("Can't move task to different control group\n");
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * Ensure the task's closid and rmid are written before determining if
-	 * the task is current that will decide if it will be interrupted.
-	 */
-	barrier();
-
-	/*
-	 * By now, the task's closid and rmid are set. If the task is current
-	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
-	 * group go into effect. If the task is not current, the MSR will be
-	 * updated when the task is scheduled in.
-	 */
-	update_task_closid_rmid(tsk);
-
-	return 0;
-}
-
-static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
-{
-	return (rdt_alloc_capable &&
-	       (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
-}
-
-static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
-{
-	return (rdt_mon_capable &&
-	       (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
-}
-
-/**
- * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
- * @r: Resource group
- *
- * Return: 1 if tasks have been assigned to @r, 0 otherwise
- */
-int rdtgroup_tasks_assigned(struct rdtgroup *r)
-{
-	struct task_struct *p, *t;
-	int ret = 0;
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	rcu_read_lock();
-	for_each_process_thread(p, t) {
-		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
-			ret = 1;
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int rdtgroup_task_write_permission(struct task_struct *task,
-					  struct kernfs_open_file *of)
-{
-	const struct cred *tcred = get_task_cred(task);
-	const struct cred *cred = current_cred();
-	int ret = 0;
-
-	/*
-	 * Even if we're attaching all tasks in the thread group, we only
-	 * need to check permissions on one of them.
-	 */
-	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
-	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid)) {
-		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
-		ret = -EPERM;
-	}
-
-	put_cred(tcred);
-	return ret;
-}
-
-static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
-			      struct kernfs_open_file *of)
-{
-	struct task_struct *tsk;
-	int ret;
-
-	rcu_read_lock();
-	if (pid) {
-		tsk = find_task_by_vpid(pid);
-		if (!tsk) {
-			rcu_read_unlock();
-			rdt_last_cmd_printf("No task %d\n", pid);
-			return -ESRCH;
-		}
-	} else {
-		tsk = current;
-	}
-
-	get_task_struct(tsk);
-	rcu_read_unlock();
-
-	ret = rdtgroup_task_write_permission(tsk, of);
-	if (!ret)
-		ret = __rdtgroup_move_task(tsk, rdtgrp);
-
-	put_task_struct(tsk);
-	return ret;
-}
-
-static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
-				    char *buf, size_t nbytes, loff_t off)
-{
-	struct rdtgroup *rdtgrp;
-	int ret = 0;
-	pid_t pid;
-
-	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
-		return -EINVAL;
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-	rdt_last_cmd_clear();
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
-	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Pseudo-locking in progress\n");
-		goto unlock;
-	}
-
-	ret = rdtgroup_move_task(pid, rdtgrp, of);
-
-unlock:
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret ?: nbytes;
-}
-
-static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
-{
-	struct task_struct *p, *t;
-
-	rcu_read_lock();
-	for_each_process_thread(p, t) {
-		if (is_closid_match(t, r) || is_rmid_match(t, r))
-			seq_printf(s, "%d\n", t->pid);
-	}
-	rcu_read_unlock();
-}
-
-static int rdtgroup_tasks_show(struct kernfs_open_file *of,
-			       struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-	int ret = 0;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (rdtgrp)
-		show_rdt_tasks(rdtgrp, s);
-	else
-		ret = -ENOENT;
-	rdtgroup_kn_unlock(of->kn);
-
-	return ret;
-}
-
-#ifdef CONFIG_PROC_CPU_RESCTRL
-
-/*
- * A task can only be part of one resctrl control group and of one monitor
- * group which is associated to that control group.
- *
- * 1)   res:
- *      mon:
- *
- *    resctrl is not available.
- *
- * 2)   res:/
- *      mon:
- *
- *    Task is part of the root resctrl control group, and it is not associated
- *    to any monitor group.
- *
- * 3)  res:/
- *     mon:mon0
- *
- *    Task is part of the root resctrl control group and monitor group mon0.
- *
- * 4)  res:group0
- *     mon:
- *
- *    Task is part of resctrl control group group0, and it is not associated
- *    to any monitor group.
- *
- * 5) res:group0
- *    mon:mon1
- *
- *    Task is part of resctrl control group group0 and monitor group mon1.
- */
-int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
-		      struct pid *pid, struct task_struct *tsk)
-{
-	struct rdtgroup *rdtg;
-	int ret = 0;
-
-	mutex_lock(&rdtgroup_mutex);
-
-	/* Return empty if resctrl has not been mounted. */
-	if (!static_branch_unlikely(&rdt_enable_key)) {
-		seq_puts(s, "res:\nmon:\n");
-		goto unlock;
-	}
-
-	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
-		struct rdtgroup *crg;
-
-		/*
-		 * Task information is only relevant for shareable
-		 * and exclusive groups.
-		 */
-		if (rdtg->mode != RDT_MODE_SHAREABLE &&
-		    rdtg->mode != RDT_MODE_EXCLUSIVE)
-			continue;
-
-		if (rdtg->closid != tsk->closid)
-			continue;
-
-		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
-			   rdtg->kn->name);
-		seq_puts(s, "mon:");
-		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
-				    mon.crdtgrp_list) {
-			if (tsk->rmid != crg->mon.rmid)
-				continue;
-			seq_printf(s, "%s", crg->kn->name);
-			break;
-		}
-		seq_putc(s, '\n');
-		goto unlock;
-	}
-	/*
-	 * The above search should succeed. Otherwise return
-	 * with an error.
-	 */
-	ret = -ENOENT;
-unlock:
-	mutex_unlock(&rdtgroup_mutex);
-
-	return ret;
-}
-#endif
-
-static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
-				    struct seq_file *seq, void *v)
-{
-	int len;
-
-	mutex_lock(&rdtgroup_mutex);
-	len = seq_buf_used(&last_cmd_status);
-	if (len)
-		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
-	else
-		seq_puts(seq, "ok\n");
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static int rdt_num_closids_show(struct kernfs_open_file *of,
-				struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%d\n", r->num_closid);
-	return 0;
-}
-
-static int rdt_default_ctrl_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%x\n", r->default_ctrl);
-	return 0;
-}
-
-static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
-	return 0;
-}
-
-static int rdt_shareable_bits_show(struct kernfs_open_file *of,
-				   struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%x\n", r->cache.shareable_bits);
-	return 0;
-}
-
-/**
- * rdt_bit_usage_show - Display current usage of resources
- *
- * A domain is a shared resource that can now be allocated differently. Here
- * we display the current regions of the domain as an annotated bitmask.
- * For each domain of this resource its allocation bitmask
- * is annotated as below to indicate the current usage of the corresponding bit:
- *   0 - currently unused
- *   X - currently available for sharing and used by software and hardware
- *   H - currently used by hardware only but available for software use
- *   S - currently used and shareable by software only
- *   E - currently used exclusively by one resource group
- *   P - currently pseudo-locked by one resource group
- */
-static int rdt_bit_usage_show(struct kernfs_open_file *of,
-			      struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	/*
-	 * Use unsigned long even though only 32 bits are used to ensure
-	 * test_bit() is used safely.
-	 */
-	unsigned long sw_shareable = 0, hw_shareable = 0;
-	unsigned long exclusive = 0, pseudo_locked = 0;
-	struct rdt_domain *dom;
-	int i, hwb, swb, excl, psl;
-	enum rdtgrp_mode mode;
-	bool sep = false;
-	u32 *ctrl;
-
-	mutex_lock(&rdtgroup_mutex);
-	hw_shareable = r->cache.shareable_bits;
-	list_for_each_entry(dom, &r->domains, list) {
-		if (sep)
-			seq_putc(seq, ';');
-		ctrl = dom->ctrl_val;
-		sw_shareable = 0;
-		exclusive = 0;
-		seq_printf(seq, "%d=", dom->id);
-		for (i = 0; i < closids_supported(); i++, ctrl++) {
-			if (!closid_allocated(i))
-				continue;
-			mode = rdtgroup_mode_by_closid(i);
-			switch (mode) {
-			case RDT_MODE_SHAREABLE:
-				sw_shareable |= *ctrl;
-				break;
-			case RDT_MODE_EXCLUSIVE:
-				exclusive |= *ctrl;
-				break;
-			case RDT_MODE_PSEUDO_LOCKSETUP:
-			/*
-			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
-			 * here but not included since the CBM
-			 * associated with this CLOSID in this mode
-			 * is not initialized and no task or cpu can be
-			 * assigned this CLOSID.
-			 */
-				break;
-			case RDT_MODE_PSEUDO_LOCKED:
-			case RDT_NUM_MODES:
-				WARN(1,
-				     "invalid mode for closid %d\n", i);
-				break;
-			}
-		}
-		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
-			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
-			hwb = test_bit(i, &hw_shareable);
-			swb = test_bit(i, &sw_shareable);
-			excl = test_bit(i, &exclusive);
-			psl = test_bit(i, &pseudo_locked);
-			if (hwb && swb)
-				seq_putc(seq, 'X');
-			else if (hwb && !swb)
-				seq_putc(seq, 'H');
-			else if (!hwb && swb)
-				seq_putc(seq, 'S');
-			else if (excl)
-				seq_putc(seq, 'E');
-			else if (psl)
-				seq_putc(seq, 'P');
-			else /* Unused bits remain */
-				seq_putc(seq, '0');
-		}
-		sep = true;
-	}
-	seq_putc(seq, '\n');
-	mutex_unlock(&rdtgroup_mutex);
-	return 0;
-}
-
-static int rdt_min_bw_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%u\n", r->membw.min_bw);
-	return 0;
-}
-
-static int rdt_num_rmids_show(struct kernfs_open_file *of,
-			      struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%d\n", r->num_rmid);
-
-	return 0;
-}
-
-static int rdt_mon_features_show(struct kernfs_open_file *of,
-				 struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	struct mon_evt *mevt;
-
-	list_for_each_entry(mevt, &r->evt_list, list)
-		seq_printf(seq, "%s\n", mevt->name);
-
-	return 0;
-}
-
-static int rdt_bw_gran_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%u\n", r->membw.bw_gran);
-	return 0;
-}
-
-static int rdt_delay_linear_show(struct kernfs_open_file *of,
-			     struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%u\n", r->membw.delay_linear);
-	return 0;
-}
-
-static int max_threshold_occ_show(struct kernfs_open_file *of,
-				  struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
-
-	return 0;
-}
-
-static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
-					 struct seq_file *seq, void *v)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-
-	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
-		seq_puts(seq, "per-thread\n");
-	else
-		seq_puts(seq, "max\n");
-
-	return 0;
-}
-
-static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
-				       char *buf, size_t nbytes, loff_t off)
-{
-	struct rdt_resource *r = of->kn->parent->priv;
-	unsigned int bytes;
-	int ret;
-
-	ret = kstrtouint(buf, 0, &bytes);
-	if (ret)
-		return ret;
-
-	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
-		return -EINVAL;
-
-	resctrl_cqm_threshold = bytes / r->mon_scale;
-
-	return nbytes;
-}
-
-/*
- * rdtgroup_mode_show - Display mode of this resource group
- */
-static int rdtgroup_mode_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
-{
-	struct rdtgroup *rdtgrp;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-
-	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
-
-	rdtgroup_kn_unlock(of->kn);
-	return 0;
-}
-
-/**
- * rdt_cdp_peer_get - Retrieve CDP peer if it exists
- * @r: RDT resource to which RDT domain @d belongs
- * @d: Cache instance for which a CDP peer is requested
- * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
- *         Used to return the result.
- * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
- *         Used to return the result.
- *
- * RDT resources are managed independently and by extension the RDT domains
- * (RDT resource instances) are managed independently also. The Code and
- * Data Prioritization (CDP) RDT resources, while managed independently,
- * could refer to the same underlying hardware. For example,
- * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
- *
- * When provided with an RDT resource @r and an instance of that RDT
- * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
- * resource and the exact instance that shares the same hardware.
- *
- * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
- *         If a CDP peer was found, @r_cdp will point to the peer RDT resource
- *         and @d_cdp will point to the peer RDT domain.
- */
-static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
-			    struct rdt_resource **r_cdp,
-			    struct rdt_domain **d_cdp)
-{
-	struct rdt_resource *_r_cdp = NULL;
-	struct rdt_domain *_d_cdp = NULL;
-	int ret = 0;
-
-	switch (r->rid) {
-	case RDT_RESOURCE_L3DATA:
-		_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
-		break;
-	case RDT_RESOURCE_L3CODE:
-		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L3DATA];
-		break;
-	case RDT_RESOURCE_L2DATA:
-		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2CODE];
-		break;
-	case RDT_RESOURCE_L2CODE:
-		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2DATA];
-		break;
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		return 1;
 	default:
-		ret = -ENOENT;
-		goto out;
-	}
-
-	/*
-	 * When a new CPU comes online and CDP is enabled then the new
-	 * RDT domains (if any) associated with both CDP RDT resources
-	 * are added in the same CPU online routine while the
-	 * rdtgroup_mutex is held. It should thus not happen for one
-	 * RDT domain to exist and be associated with its RDT CDP
-	 * resource but there is no RDT domain associated with the
-	 * peer RDT CDP resource. Hence the WARN.
-	 */
-	_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
-	if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
-		_r_cdp = NULL;
-		_d_cdp = NULL;
-		ret = -EINVAL;
-	}
-
-out:
-	*r_cdp = _r_cdp;
-	*d_cdp = _d_cdp;
-
-	return ret;
-}
-
-/**
- * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
- * @r: Resource to which domain instance @d belongs.
- * @d: The domain instance for which @closid is being tested.
- * @cbm: Capacity bitmask being tested.
- * @closid: Intended closid for @cbm.
- * @exclusive: Only check if overlaps with exclusive resource groups
- *
- * Checks if provided @cbm intended to be used for @closid on domain
- * @d overlaps with any other closids or other hardware usage associated
- * with this domain. If @exclusive is true then only overlaps with
- * resource groups in exclusive mode will be considered. If @exclusive
- * is false then overlaps with any resource group or hardware entities
- * will be considered.
- *
- * @cbm is unsigned long, even if only 32 bits are used, to make the
- * bitmap functions work correctly.
- *
- * Return: false if CBM does not overlap, true if it does.
- */
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-				    unsigned long cbm, int closid, bool exclusive)
-{
-	enum rdtgrp_mode mode;
-	unsigned long ctrl_b;
-	u32 *ctrl;
-	int i;
-
-	/* Check for any overlap with regions used by hardware directly */
-	if (!exclusive) {
-		ctrl_b = r->cache.shareable_bits;
-		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
-			return true;
-	}
-
-	/* Check for overlap with other resource groups */
-	ctrl = d->ctrl_val;
-	for (i = 0; i < closids_supported(); i++, ctrl++) {
-		ctrl_b = *ctrl;
-		mode = rdtgroup_mode_by_closid(i);
-		if (closid_allocated(i) && i != closid &&
-		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
-			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
-				if (exclusive) {
-					if (mode == RDT_MODE_EXCLUSIVE)
-						return true;
-					continue;
-				}
-				return true;
-			}
-		}
-	}
-
-	return false;
-}
-
-/**
- * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
- * @r: Resource to which domain instance @d belongs.
- * @d: The domain instance for which @closid is being tested.
- * @cbm: Capacity bitmask being tested.
- * @closid: Intended closid for @cbm.
- * @exclusive: Only check if overlaps with exclusive resource groups
- *
- * Resources that can be allocated using a CBM can use the CBM to control
- * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
- * for overlap. Overlap test is not limited to the specific resource for
- * which the CBM is intended though - when dealing with CDP resources that
- * share the underlying hardware the overlap check should be performed on
- * the CDP resource sharing the hardware also.
- *
- * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
- * overlap test.
- *
- * Return: true if CBM overlap detected, false if there is no overlap
- */
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-			   unsigned long cbm, int closid, bool exclusive)
-{
-	struct rdt_resource *r_cdp;
-	struct rdt_domain *d_cdp;
-
-	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
-		return true;
-
-	if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
-		return false;
-
-	return  __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
-}
-
-/**
- * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
- *
- * An exclusive resource group implies that there should be no sharing of
- * its allocated resources. At the time this group is considered to be
- * exclusive this test can determine if its current schemata supports this
- * setting by testing for overlap with all other resource groups.
- *
- * Return: true if resource group can be exclusive, false if there is overlap
- * with allocations of other resource groups and thus this resource group
- * cannot be exclusive.
- */
-static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
-{
-	int closid = rdtgrp->closid;
-	struct rdt_resource *r;
-	bool has_cache = false;
-	struct rdt_domain *d;
-
-	for_each_alloc_enabled_rdt_resource(r) {
-		if (r->rid == RDT_RESOURCE_MBA)
-			continue;
-		has_cache = true;
-		list_for_each_entry(d, &r->domains, list) {
-			if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
-						  rdtgrp->closid, false)) {
-				rdt_last_cmd_puts("Schemata overlaps\n");
-				return false;
-			}
-		}
-	}
-
-	if (!has_cache) {
-		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
-		return false;
-	}
-
-	return true;
-}
-
-/**
- * rdtgroup_mode_write - Modify the resource group's mode
- *
- */
-static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
-				   char *buf, size_t nbytes, loff_t off)
-{
-	struct rdtgroup *rdtgrp;
-	enum rdtgrp_mode mode;
-	int ret = 0;
-
-	/* Valid input requires a trailing newline */
-	if (nbytes == 0 || buf[nbytes - 1] != '\n')
-		return -EINVAL;
-	buf[nbytes - 1] = '\0';
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-
-	rdt_last_cmd_clear();
-
-	mode = rdtgrp->mode;
-
-	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
-	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
-	    (!strcmp(buf, "pseudo-locksetup") &&
-	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
-	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
-		goto out;
-
-	if (mode == RDT_MODE_PSEUDO_LOCKED) {
-		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (!strcmp(buf, "shareable")) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			ret = rdtgroup_locksetup_exit(rdtgrp);
-			if (ret)
-				goto out;
-		}
-		rdtgrp->mode = RDT_MODE_SHAREABLE;
-	} else if (!strcmp(buf, "exclusive")) {
-		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-			ret = rdtgroup_locksetup_exit(rdtgrp);
-			if (ret)
-				goto out;
-		}
-		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
-	} else if (!strcmp(buf, "pseudo-locksetup")) {
-		ret = rdtgroup_locksetup_enter(rdtgrp);
-		if (ret)
-			goto out;
-		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
-	} else {
-		rdt_last_cmd_puts("Unknown or unsupported mode\n");
-		ret = -EINVAL;
-	}
-
-out:
-	rdtgroup_kn_unlock(of->kn);
-	return ret ?: nbytes;
-}
-
-/**
- * rdtgroup_cbm_to_size - Translate CBM to size in bytes
- * @r: RDT resource to which @d belongs.
- * @d: RDT domain instance.
- * @cbm: bitmask for which the size should be computed.
- *
- * The bitmask provided associated with the RDT domain instance @d will be
- * translated into how many bytes it represents. The size in bytes is
- * computed by first dividing the total cache size by the CBM length to
- * determine how many bytes each bit in the bitmask represents. The result
- * is multiplied with the number of bits set in the bitmask.
- *
- * @cbm is unsigned long, even if only 32 bits are used to make the
- * bitmap functions work correctly.
- */
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
-				  struct rdt_domain *d, unsigned long cbm)
-{
-	struct cpu_cacheinfo *ci;
-	unsigned int size = 0;
-	int num_b, i;
-
-	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
-	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
-	for (i = 0; i < ci->num_leaves; i++) {
-		if (ci->info_list[i].level == r->cache_level) {
-			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
-			break;
-		}
+		/* Should never reach here */
+		return INVALID_CONFIG_INDEX;
 	}
-
-	return size;
 }
 
-/**
- * rdtgroup_size_show - Display size in bytes of allocated regions
- *
- * The "size" file mirrors the layout of the "schemata" file, printing the
- * size in bytes of each region instead of the capacity bitmask.
- *
- */
-static int rdtgroup_size_show(struct kernfs_open_file *of,
-			      struct seq_file *s, void *v)
+void resctrl_arch_mon_event_config_read(void *_config_info)
 {
-	struct rdtgroup *rdtgrp;
-	struct rdt_resource *r;
-	struct rdt_domain *d;
-	unsigned int size;
-	int ret = 0;
-	bool sep;
-	u32 ctrl;
-
-	rdtgrp = rdtgroup_kn_lock_live(of->kn);
-	if (!rdtgrp) {
-		rdtgroup_kn_unlock(of->kn);
-		return -ENOENT;
-	}
-
-	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-		if (!rdtgrp->plr->d) {
-			rdt_last_cmd_clear();
-			rdt_last_cmd_puts("Cache domain offline\n");
-			ret = -ENODEV;
-		} else {
-			seq_printf(s, "%*s:", max_name_width,
-				   rdtgrp->plr->r->name);
-			size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
-						    rdtgrp->plr->d,
-						    rdtgrp->plr->cbm);
-			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
-		}
-		goto out;
-	}
-
-	for_each_alloc_enabled_rdt_resource(r) {
-		sep = false;
-		seq_printf(s, "%*s:", max_name_width, r->name);
-		list_for_each_entry(d, &r->domains, list) {
-			if (sep)
-				seq_putc(s, ';');
-			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
-				size = 0;
-			} else {
-				ctrl = (!is_mba_sc(r) ?
-						d->ctrl_val[rdtgrp->closid] :
-						d->mbps_val[rdtgrp->closid]);
-				if (r->rid == RDT_RESOURCE_MBA)
-					size = ctrl;
-				else
-					size = rdtgroup_cbm_to_size(r, d, ctrl);
-			}
-			seq_printf(s, "%d=%u", d->id, size);
-			sep = true;
-		}
-		seq_putc(s, '\n');
-	}
-
-out:
-	rdtgroup_kn_unlock(of->kn);
+	struct resctrl_mon_config_info *config_info = _config_info;
+	unsigned int index;
+	u64 msrval;
 
-	return ret;
-}
-
-/* rdtgroup information files for one cache resource. */
-static struct rftype res_common_files[] = {
-	{
-		.name		= "last_cmd_status",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_last_cmd_status_show,
-		.fflags		= RF_TOP_INFO,
-	},
-	{
-		.name		= "num_closids",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_num_closids_show,
-		.fflags		= RF_CTRL_INFO,
-	},
-	{
-		.name		= "mon_features",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_mon_features_show,
-		.fflags		= RF_MON_INFO,
-	},
-	{
-		.name		= "num_rmids",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_num_rmids_show,
-		.fflags		= RF_MON_INFO,
-	},
-	{
-		.name		= "cbm_mask",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_default_ctrl_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "min_cbm_bits",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_min_cbm_bits_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "shareable_bits",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_shareable_bits_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "bit_usage",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_bit_usage_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "min_bandwidth",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_min_bw_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
-	},
-	{
-		.name		= "bandwidth_gran",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_bw_gran_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
-	},
-	{
-		.name		= "delay_linear",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_delay_linear_show,
-		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
-	},
-	/*
-	 * Platform specific which (if any) capabilities are provided by
-	 * thread_throttle_mode. Defer "fflags" initialization to platform
-	 * discovery.
-	 */
-	{
-		.name		= "thread_throttle_mode",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdt_thread_throttle_mode_show,
-	},
-	{
-		.name		= "max_threshold_occupancy",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= max_threshold_occ_write,
-		.seq_show	= max_threshold_occ_show,
-		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
-	},
-	{
-		.name		= "cpus",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_cpus_write,
-		.seq_show	= rdtgroup_cpus_show,
-		.fflags		= RFTYPE_BASE,
-	},
-	{
-		.name		= "cpus_list",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_cpus_write,
-		.seq_show	= rdtgroup_cpus_show,
-		.flags		= RFTYPE_FLAGS_CPUS_LIST,
-		.fflags		= RFTYPE_BASE,
-	},
-	{
-		.name		= "tasks",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_tasks_write,
-		.seq_show	= rdtgroup_tasks_show,
-		.fflags		= RFTYPE_BASE,
-	},
-	{
-		.name		= "schemata",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_schemata_write,
-		.seq_show	= rdtgroup_schemata_show,
-		.fflags		= RF_CTRL_BASE,
-	},
-	{
-		.name		= "mode",
-		.mode		= 0644,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.write		= rdtgroup_mode_write,
-		.seq_show	= rdtgroup_mode_show,
-		.fflags		= RF_CTRL_BASE,
-	},
-	{
-		.name		= "size",
-		.mode		= 0444,
-		.kf_ops		= &rdtgroup_kf_single_ops,
-		.seq_show	= rdtgroup_size_show,
-		.fflags		= RF_CTRL_BASE,
-	},
-
-};
-
-static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
-{
-	struct rftype *rfts, *rft;
-	int ret, len;
-
-	rfts = res_common_files;
-	len = ARRAY_SIZE(res_common_files);
-
-	lockdep_assert_held(&rdtgroup_mutex);
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
-			ret = rdtgroup_add_file(kn, rft);
-			if (ret)
-				goto error;
-		}
-	}
-
-	return 0;
-error:
-	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
-	while (--rft >= rfts) {
-		if ((fflags & rft->fflags) == rft->fflags)
-			kernfs_remove_by_name(kn, rft->name);
-	}
-	return ret;
-}
-
-static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
-{
-	struct rftype *rfts, *rft;
-	int len;
-
-	rfts = res_common_files;
-	len = ARRAY_SIZE(res_common_files);
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		if (!strcmp(rft->name, name))
-			return rft;
-	}
-
-	return NULL;
-}
-
-void __init thread_throttle_mode_init(void)
-{
-	struct rftype *rft;
-
-	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
-	if (!rft)
+	index = mon_event_config_index_get(config_info->evtid);
+	if (index == INVALID_CONFIG_INDEX) {
+		pr_warn_once("Invalid event id %d\n", config_info->evtid);
 		return;
-
-	rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
-}
-
-/**
- * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
- * @r: The resource group with which the file is associated.
- * @name: Name of the file
- *
- * The permissions of named resctrl file, directory, or link are modified
- * to not allow read, write, or execute by any user.
- *
- * WARNING: This function is intended to communicate to the user that the
- * resctrl file has been locked down - that it is not relevant to the
- * particular state the system finds itself in. It should not be relied
- * on to protect from user access because after the file's permissions
- * are restricted the user can still change the permissions using chmod
- * from the command line.
- *
- * Return: 0 on success, <0 on failure.
- */
-int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
-{
-	struct iattr iattr = {.ia_valid = ATTR_MODE,};
-	struct kernfs_node *kn;
-	int ret = 0;
-
-	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
-	if (!kn)
-		return -ENOENT;
-
-	switch (kernfs_type(kn)) {
-	case KERNFS_DIR:
-		iattr.ia_mode = S_IFDIR;
-		break;
-	case KERNFS_FILE:
-		iattr.ia_mode = S_IFREG;
-		break;
-	case KERNFS_LINK:
-		iattr.ia_mode = S_IFLNK;
-		break;
-	}
-
-	ret = kernfs_setattr(kn, &iattr);
-	kernfs_put(kn);
-	return ret;
-}
-
-/**
- * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
- * @r: The resource group with which the file is associated.
- * @name: Name of the file
- * @mask: Mask of permissions that should be restored
- *
- * Restore the permissions of the named file. If @name is a directory the
- * permissions of its parent will be used.
- *
- * Return: 0 on success, <0 on failure.
- */
-int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
-			     umode_t mask)
-{
-	struct iattr iattr = {.ia_valid = ATTR_MODE,};
-	struct kernfs_node *kn, *parent;
-	struct rftype *rfts, *rft;
-	int ret, len;
-
-	rfts = res_common_files;
-	len = ARRAY_SIZE(res_common_files);
-
-	for (rft = rfts; rft < rfts + len; rft++) {
-		if (!strcmp(rft->name, name))
-			iattr.ia_mode = rft->mode & mask;
-	}
-
-	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
-	if (!kn)
-		return -ENOENT;
-
-	switch (kernfs_type(kn)) {
-	case KERNFS_DIR:
-		parent = kernfs_get_parent(kn);
-		if (parent) {
-			iattr.ia_mode |= parent->mode;
-			kernfs_put(parent);
-		}
-		iattr.ia_mode |= S_IFDIR;
-		break;
-	case KERNFS_FILE:
-		iattr.ia_mode |= S_IFREG;
-		break;
-	case KERNFS_LINK:
-		iattr.ia_mode |= S_IFLNK;
-		break;
 	}
+	rdmsrq(MSR_IA32_EVT_CFG_BASE + index, msrval);
 
-	ret = kernfs_setattr(kn, &iattr);
-	kernfs_put(kn);
-	return ret;
-}
-
-static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
-				      unsigned long fflags)
-{
-	struct kernfs_node *kn_subdir;
-	int ret;
-
-	kn_subdir = kernfs_create_dir(kn_info, name,
-				      kn_info->mode, r);
-	if (IS_ERR(kn_subdir))
-		return PTR_ERR(kn_subdir);
-
-	ret = rdtgroup_kn_set_ugid(kn_subdir);
-	if (ret)
-		return ret;
-
-	ret = rdtgroup_add_files(kn_subdir, fflags);
-	if (!ret)
-		kernfs_activate(kn_subdir);
-
-	return ret;
+	/* Report only the valid event configuration bits */
+	config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
 }
 
-static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
+void resctrl_arch_mon_event_config_write(void *_config_info)
 {
-	struct rdt_resource *r;
-	unsigned long fflags;
-	char name[32];
-	int ret;
+	struct resctrl_mon_config_info *config_info = _config_info;
+	unsigned int index;
 
-	/* create the directory */
-	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
-	if (IS_ERR(kn_info))
-		return PTR_ERR(kn_info);
-
-	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
-	if (ret)
-		goto out_destroy;
-
-	for_each_alloc_enabled_rdt_resource(r) {
-		fflags =  r->fflags | RF_CTRL_INFO;
-		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
-		if (ret)
-			goto out_destroy;
-	}
-
-	for_each_mon_enabled_rdt_resource(r) {
-		fflags =  r->fflags | RF_MON_INFO;
-		sprintf(name, "%s_MON", r->name);
-		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
-		if (ret)
-			goto out_destroy;
+	index = mon_event_config_index_get(config_info->evtid);
+	if (index == INVALID_CONFIG_INDEX) {
+		pr_warn_once("Invalid event id %d\n", config_info->evtid);
+		return;
 	}
-
-	ret = rdtgroup_kn_set_ugid(kn_info);
-	if (ret)
-		goto out_destroy;
-
-	kernfs_activate(kn_info);
-
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn_info);
-	return ret;
-}
-
-static int
-mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
-		    char *name, struct kernfs_node **dest_kn)
-{
-	struct kernfs_node *kn;
-	int ret;
-
-	/* create the directory */
-	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	if (dest_kn)
-		*dest_kn = kn;
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret)
-		goto out_destroy;
-
-	kernfs_activate(kn);
-
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
+	wrmsrq(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config);
 }
 
 static void l3_qos_cfg_update(void *arg)
 {
 	bool *enable = arg;
 
-	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
+	wrmsrq(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
 }
 
 static void l2_qos_cfg_update(void *arg)
 {
 	bool *enable = arg;
 
-	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
-}
-
-static inline bool is_mba_linear(void)
-{
-	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+	wrmsrq(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
 }
 
 static int set_cache_qos_cfg(int level, bool enable)
 {
 	void (*update)(void *arg);
+	struct rdt_ctrl_domain *d;
 	struct rdt_resource *r_l;
 	cpumask_var_t cpu_mask;
-	struct rdt_domain *d;
 	int cpu;
 
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
+
 	if (level == RDT_RESOURCE_L3)
 		update = l3_qos_cfg_update;
 	else if (level == RDT_RESOURCE_L2)
@@ -1888,23 +150,19 @@ static int set_cache_qos_cfg(int level, bool enable)
 	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
 		return -ENOMEM;
 
-	r_l = &rdt_resources_all[level];
-	list_for_each_entry(d, &r_l->domains, list) {
+	r_l = &rdt_resources_all[level].r_resctrl;
+	list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
 		if (r_l->cache.arch_has_per_cpu_cfg)
 			/* Pick all the CPUs in the domain instance */
-			for_each_cpu(cpu, &d->cpu_mask)
+			for_each_cpu(cpu, &d->hdr.cpu_mask)
 				cpumask_set_cpu(cpu, cpu_mask);
 		else
 			/* Pick one CPU from each domain instance to update MSR */
-			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+			cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
 	}
-	cpu = get_cpu();
-	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
-	if (cpumask_test_cpu(cpu, cpu_mask))
-		update(&enable);
-	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
-	smp_call_function_many(cpu_mask, update, &enable, 1);
-	put_cpu();
+
+	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
+	on_each_cpu_mask(cpu_mask, update, &enable, 1);
 
 	free_cpumask_var(cpu_mask);
 
@@ -1914,1333 +172,91 @@ static int set_cache_qos_cfg(int level, bool enable)
 /* Restore the qos cfg state when a domain comes online */
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
 {
-	if (!r->alloc_capable)
-		return;
-
-	if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA])
-		l2_qos_cfg_update(&r->alloc_enabled);
-
-	if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA])
-		l3_qos_cfg_update(&r->alloc_enabled);
-}
-
-/*
- * Enable or disable the MBA software controller
- * which helps user specify bandwidth in MBps.
- * MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale.
- */
-static int set_mba_sc(bool mba_sc)
-{
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
-	struct rdt_domain *d;
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 
-	if (!is_mbm_enabled() || !is_mba_linear() ||
-	    mba_sc == is_mba_sc(r))
-		return -EINVAL;
+	if (!r->cdp_capable)
+		return;
 
-	r->membw.mba_sc = mba_sc;
-	list_for_each_entry(d, &r->domains, list)
-		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
+	if (r->rid == RDT_RESOURCE_L2)
+		l2_qos_cfg_update(&hw_res->cdp_enabled);
 
-	return 0;
+	if (r->rid == RDT_RESOURCE_L3)
+		l3_qos_cfg_update(&hw_res->cdp_enabled);
 }
 
-static int cdp_enable(int level, int data_type, int code_type)
+static int cdp_enable(int level)
 {
-	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
-	struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
-	struct rdt_resource *r_l = &rdt_resources_all[level];
+	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
 	int ret;
 
-	if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
-	    !r_lcode->alloc_capable)
+	if (!r_l->alloc_capable)
 		return -EINVAL;
 
 	ret = set_cache_qos_cfg(level, true);
-	if (!ret) {
-		r_l->alloc_enabled = false;
-		r_ldata->alloc_enabled = true;
-		r_lcode->alloc_enabled = true;
-	}
-	return ret;
-}
-
-static int cdpl3_enable(void)
-{
-	return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
-			  RDT_RESOURCE_L3CODE);
-}
-
-static int cdpl2_enable(void)
-{
-	return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
-			  RDT_RESOURCE_L2CODE);
-}
-
-static void cdp_disable(int level, int data_type, int code_type)
-{
-	struct rdt_resource *r = &rdt_resources_all[level];
-
-	r->alloc_enabled = r->alloc_capable;
-
-	if (rdt_resources_all[data_type].alloc_enabled) {
-		rdt_resources_all[data_type].alloc_enabled = false;
-		rdt_resources_all[code_type].alloc_enabled = false;
-		set_cache_qos_cfg(level, false);
-	}
-}
-
-static void cdpl3_disable(void)
-{
-	cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
-}
-
-static void cdpl2_disable(void)
-{
-	cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
-}
-
-static void cdp_disable_all(void)
-{
-	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
-		cdpl3_disable();
-	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
-		cdpl2_disable();
-}
-
-/*
- * We don't allow rdtgroup directories to be created anywhere
- * except the root directory. Thus when looking for the rdtgroup
- * structure for a kernfs node we are either looking at a directory,
- * in which case the rdtgroup structure is pointed at by the "priv"
- * field, otherwise we have a file, and need only look to the parent
- * to find the rdtgroup.
- */
-static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
-{
-	if (kernfs_type(kn) == KERNFS_DIR) {
-		/*
-		 * All the resource directories use "kn->priv"
-		 * to point to the "struct rdtgroup" for the
-		 * resource. "info" and its subdirectories don't
-		 * have rdtgroup structures, so return NULL here.
-		 */
-		if (kn == kn_info || kn->parent == kn_info)
-			return NULL;
-		else
-			return kn->priv;
-	} else {
-		return kn->parent->priv;
-	}
-}
-
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
-{
-	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
-
-	if (!rdtgrp)
-		return NULL;
-
-	atomic_inc(&rdtgrp->waitcount);
-	kernfs_break_active_protection(kn);
-
-	mutex_lock(&rdtgroup_mutex);
-
-	/* Was this group deleted while we waited? */
-	if (rdtgrp->flags & RDT_DELETED)
-		return NULL;
-
-	return rdtgrp;
-}
-
-void rdtgroup_kn_unlock(struct kernfs_node *kn)
-{
-	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
-
-	if (!rdtgrp)
-		return;
-
-	mutex_unlock(&rdtgroup_mutex);
-
-	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
-	    (rdtgrp->flags & RDT_DELETED)) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
-			rdtgroup_pseudo_lock_remove(rdtgrp);
-		kernfs_unbreak_active_protection(kn);
-		rdtgroup_remove(rdtgrp);
-	} else {
-		kernfs_unbreak_active_protection(kn);
-	}
-}
-
-static int mkdir_mondata_all(struct kernfs_node *parent_kn,
-			     struct rdtgroup *prgrp,
-			     struct kernfs_node **mon_data_kn);
-
-static int rdt_enable_ctx(struct rdt_fs_context *ctx)
-{
-	int ret = 0;
-
-	if (ctx->enable_cdpl2)
-		ret = cdpl2_enable();
-
-	if (!ret && ctx->enable_cdpl3)
-		ret = cdpl3_enable();
-
-	if (!ret && ctx->enable_mba_mbps)
-		ret = set_mba_sc(true);
+	if (!ret)
+		rdt_resources_all[level].cdp_enabled = true;
 
 	return ret;
 }
 
-static int rdt_get_tree(struct fs_context *fc)
+static void cdp_disable(int level)
 {
-	struct rdt_fs_context *ctx = rdt_fc2context(fc);
-	struct rdt_domain *dom;
-	struct rdt_resource *r;
-	int ret;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-	/*
-	 * resctrl file system can only be mounted once.
-	 */
-	if (static_branch_unlikely(&rdt_enable_key)) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ret = rdt_enable_ctx(ctx);
-	if (ret < 0)
-		goto out_cdp;
-
-	closid_init();
-
-	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
-	if (ret < 0)
-		goto out_mba;
+	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
 
-	if (rdt_mon_capable) {
-		ret = mongroup_create_dir(rdtgroup_default.kn,
-					  &rdtgroup_default, "mon_groups",
-					  &kn_mongrp);
-		if (ret < 0)
-			goto out_info;
-
-		ret = mkdir_mondata_all(rdtgroup_default.kn,
-					&rdtgroup_default, &kn_mondata);
-		if (ret < 0)
-			goto out_mongrp;
-		rdtgroup_default.mon.mon_data_kn = kn_mondata;
-	}
-
-	ret = rdt_pseudo_lock_init();
-	if (ret)
-		goto out_mondata;
-
-	ret = kernfs_get_tree(fc);
-	if (ret < 0)
-		goto out_psl;
-
-	if (rdt_alloc_capable)
-		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
-	if (rdt_mon_capable)
-		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
-
-	if (rdt_alloc_capable || rdt_mon_capable)
-		static_branch_enable_cpuslocked(&rdt_enable_key);
-
-	if (is_mbm_enabled()) {
-		r = &rdt_resources_all[RDT_RESOURCE_L3];
-		list_for_each_entry(dom, &r->domains, list)
-			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+	if (r_hw->cdp_enabled) {
+		set_cache_qos_cfg(level, false);
+		r_hw->cdp_enabled = false;
 	}
-
-	goto out;
-
-out_psl:
-	rdt_pseudo_lock_release();
-out_mondata:
-	if (rdt_mon_capable)
-		kernfs_remove(kn_mondata);
-out_mongrp:
-	if (rdt_mon_capable)
-		kernfs_remove(kn_mongrp);
-out_info:
-	kernfs_remove(kn_info);
-out_mba:
-	if (ctx->enable_mba_mbps)
-		set_mba_sc(false);
-out_cdp:
-	cdp_disable_all();
-out:
-	rdt_last_cmd_clear();
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-	return ret;
 }
 
-enum rdt_param {
-	Opt_cdp,
-	Opt_cdpl2,
-	Opt_mba_mbps,
-	nr__rdt_params
-};
-
-static const struct fs_parameter_spec rdt_fs_parameters[] = {
-	fsparam_flag("cdp",		Opt_cdp),
-	fsparam_flag("cdpl2",		Opt_cdpl2),
-	fsparam_flag("mba_MBps",	Opt_mba_mbps),
-	{}
-};
-
-static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
 {
-	struct rdt_fs_context *ctx = rdt_fc2context(fc);
-	struct fs_parse_result result;
-	int opt;
-
-	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
-	if (opt < 0)
-		return opt;
+	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
 
-	switch (opt) {
-	case Opt_cdp:
-		ctx->enable_cdpl3 = true;
-		return 0;
-	case Opt_cdpl2:
-		ctx->enable_cdpl2 = true;
-		return 0;
-	case Opt_mba_mbps:
-		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-			return -EINVAL;
-		ctx->enable_mba_mbps = true;
-		return 0;
-	}
+	if (!hw_res->r_resctrl.cdp_capable)
+		return -EINVAL;
 
-	return -EINVAL;
-}
+	if (enable)
+		return cdp_enable(l);
 
-static void rdt_fs_context_free(struct fs_context *fc)
-{
-	struct rdt_fs_context *ctx = rdt_fc2context(fc);
+	cdp_disable(l);
 
-	kernfs_free_fs_context(fc);
-	kfree(ctx);
+	return 0;
 }
 
-static const struct fs_context_operations rdt_fs_context_ops = {
-	.free		= rdt_fs_context_free,
-	.parse_param	= rdt_parse_param,
-	.get_tree	= rdt_get_tree,
-};
-
-static int rdt_init_fs_context(struct fs_context *fc)
+bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
 {
-	struct rdt_fs_context *ctx;
-
-	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->kfc.root = rdt_root;
-	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
-	fc->fs_private = &ctx->kfc;
-	fc->ops = &rdt_fs_context_ops;
-	put_user_ns(fc->user_ns);
-	fc->user_ns = get_user_ns(&init_user_ns);
-	fc->global = true;
-	return 0;
+	return rdt_resources_all[l].cdp_enabled;
 }
 
-static int reset_all_ctrls(struct rdt_resource *r)
+void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
 {
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	struct rdt_hw_ctrl_domain *hw_dom;
 	struct msr_param msr_param;
-	cpumask_var_t cpu_mask;
-	struct rdt_domain *d;
-	int i, cpu;
+	struct rdt_ctrl_domain *d;
+	int i;
 
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
-		return -ENOMEM;
+	/* Walking r->domains, ensure it can't race with cpuhp */
+	lockdep_assert_cpus_held();
 
 	msr_param.res = r;
 	msr_param.low = 0;
-	msr_param.high = r->num_closid;
+	msr_param.high = hw_res->num_closid;
 
 	/*
 	 * Disable resource control for this resource by setting all
-	 * CBMs in all domains to the maximum mask value. Pick one CPU
+	 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
 	 * from each domain to update the MSRs below.
 	 */
-	list_for_each_entry(d, &r->domains, list) {
-		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-
-		for (i = 0; i < r->num_closid; i++)
-			d->ctrl_val[i] = r->default_ctrl;
-	}
-	cpu = get_cpu();
-	/* Update CBM on this cpu if it's in cpu_mask. */
-	if (cpumask_test_cpu(cpu, cpu_mask))
-		rdt_ctrl_update(&msr_param);
-	/* Update CBM on all other cpus in cpu_mask. */
-	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
-	put_cpu();
-
-	free_cpumask_var(cpu_mask);
-
-	return 0;
-}
-
-/*
- * Move tasks from one to the other group. If @from is NULL, then all tasks
- * in the systems are moved unconditionally (used for teardown).
- *
- * If @mask is not NULL the cpus on which moved tasks are running are set
- * in that mask so the update smp function call is restricted to affected
- * cpus.
- */
-static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
-				 struct cpumask *mask)
-{
-	struct task_struct *p, *t;
-
-	read_lock(&tasklist_lock);
-	for_each_process_thread(p, t) {
-		if (!from || is_closid_match(t, from) ||
-		    is_rmid_match(t, from)) {
-			WRITE_ONCE(t->closid, to->closid);
-			WRITE_ONCE(t->rmid, to->mon.rmid);
-
-			/*
-			 * If the task is on a CPU, set the CPU in the mask.
-			 * The detection is inaccurate as tasks might move or
-			 * schedule before the smp function call takes place.
-			 * In such a case the function call is pointless, but
-			 * there is no other side effect.
-			 */
-			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
-				cpumask_set_cpu(task_cpu(t), mask);
-		}
-	}
-	read_unlock(&tasklist_lock);
-}
-
-static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
-{
-	struct rdtgroup *sentry, *stmp;
-	struct list_head *head;
-
-	head = &rdtgrp->mon.crdtgrp_list;
-	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
-		free_rmid(sentry->mon.rmid);
-		list_del(&sentry->mon.crdtgrp_list);
-
-		if (atomic_read(&sentry->waitcount) != 0)
-			sentry->flags = RDT_DELETED;
-		else
-			rdtgroup_remove(sentry);
-	}
-}
-
-/*
- * Forcibly remove all of subdirectories under root.
- */
-static void rmdir_all_sub(void)
-{
-	struct rdtgroup *rdtgrp, *tmp;
-
-	/* Move all tasks to the default resource group */
-	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
-
-	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
-		/* Free any child rmids */
-		free_all_child_rdtgrp(rdtgrp);
-
-		/* Remove each rdtgroup other than root */
-		if (rdtgrp == &rdtgroup_default)
-			continue;
-
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
-			rdtgroup_pseudo_lock_remove(rdtgrp);
-
-		/*
-		 * Give any CPUs back to the default group. We cannot copy
-		 * cpu_online_mask because a CPU might have executed the
-		 * offline callback already, but is still marked online.
-		 */
-		cpumask_or(&rdtgroup_default.cpu_mask,
-			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
-
-		free_rmid(rdtgrp->mon.rmid);
-
-		kernfs_remove(rdtgrp->kn);
-		list_del(&rdtgrp->rdtgroup_list);
-
-		if (atomic_read(&rdtgrp->waitcount) != 0)
-			rdtgrp->flags = RDT_DELETED;
-		else
-			rdtgroup_remove(rdtgrp);
-	}
-	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
-	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
-
-	kernfs_remove(kn_info);
-	kernfs_remove(kn_mongrp);
-	kernfs_remove(kn_mondata);
-}
-
-static void rdt_kill_sb(struct super_block *sb)
-{
-	struct rdt_resource *r;
-
-	cpus_read_lock();
-	mutex_lock(&rdtgroup_mutex);
-
-	set_mba_sc(false);
-
-	/*Put everything back to default values. */
-	for_each_alloc_enabled_rdt_resource(r)
-		reset_all_ctrls(r);
-	cdp_disable_all();
-	rmdir_all_sub();
-	rdt_pseudo_lock_release();
-	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
-	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
-	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
-	static_branch_disable_cpuslocked(&rdt_enable_key);
-	kernfs_kill_sb(sb);
-	mutex_unlock(&rdtgroup_mutex);
-	cpus_read_unlock();
-}
-
-static struct file_system_type rdt_fs_type = {
-	.name			= "resctrl",
-	.init_fs_context	= rdt_init_fs_context,
-	.parameters		= rdt_fs_parameters,
-	.kill_sb		= rdt_kill_sb,
-};
-
-static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
-		       void *priv)
-{
-	struct kernfs_node *kn;
-	int ret = 0;
-
-	kn = __kernfs_create_file(parent_kn, name, 0444,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
-				  &kf_mondata_ops, priv, NULL, NULL);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret) {
-		kernfs_remove(kn);
-		return ret;
-	}
-
-	return ret;
-}
-
-/*
- * Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups with given domain id.
- */
-void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
-{
-	struct rdtgroup *prgrp, *crgrp;
-	char name[32];
-
-	if (!r->mon_enabled)
-		return;
-
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		sprintf(name, "mon_%s_%02d", r->name, dom_id);
-		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
-
-		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
-			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
-	}
-}
-
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
-				struct rdt_domain *d,
-				struct rdt_resource *r, struct rdtgroup *prgrp)
-{
-	union mon_data_bits priv;
-	struct kernfs_node *kn;
-	struct mon_evt *mevt;
-	struct rmid_read rr;
-	char name[32];
-	int ret;
-
-	sprintf(name, "mon_%s_%02d", r->name, d->id);
-	/* create the directory */
-	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
-	if (IS_ERR(kn))
-		return PTR_ERR(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret)
-		goto out_destroy;
-
-	if (WARN_ON(list_empty(&r->evt_list))) {
-		ret = -EPERM;
-		goto out_destroy;
-	}
-
-	priv.u.rid = r->rid;
-	priv.u.domid = d->id;
-	list_for_each_entry(mevt, &r->evt_list, list) {
-		priv.u.evtid = mevt->evtid;
-		ret = mon_addfile(kn, mevt->name, priv.priv);
-		if (ret)
-			goto out_destroy;
-
-		if (is_mbm_event(mevt->evtid))
-			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
-	}
-	kernfs_activate(kn);
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
-}
-
-/*
- * Add all subdirectories of mon_data for "ctrl_mon" groups
- * and "monitor" groups with given domain id.
- */
-void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-				    struct rdt_domain *d)
-{
-	struct kernfs_node *parent_kn;
-	struct rdtgroup *prgrp, *crgrp;
-	struct list_head *head;
-
-	if (!r->mon_enabled)
-		return;
-
-	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
-		parent_kn = prgrp->mon.mon_data_kn;
-		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
-
-		head = &prgrp->mon.crdtgrp_list;
-		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
-			parent_kn = crgrp->mon.mon_data_kn;
-			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
-		}
-	}
-}
-
-static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
-				       struct rdt_resource *r,
-				       struct rdtgroup *prgrp)
-{
-	struct rdt_domain *dom;
-	int ret;
-
-	list_for_each_entry(dom, &r->domains, list) {
-		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/*
- * This creates a directory mon_data which contains the monitored data.
- *
- * mon_data has one directory for each domain which are named
- * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
- * with L3 domain looks as below:
- * ./mon_data:
- * mon_L3_00
- * mon_L3_01
- * mon_L3_02
- * ...
- *
- * Each domain directory has one file per event:
- * ./mon_L3_00/:
- * llc_occupancy
- *
- */
-static int mkdir_mondata_all(struct kernfs_node *parent_kn,
-			     struct rdtgroup *prgrp,
-			     struct kernfs_node **dest_kn)
-{
-	struct rdt_resource *r;
-	struct kernfs_node *kn;
-	int ret;
-
-	/*
-	 * Create the mon_data directory first.
-	 */
-	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
-	if (ret)
-		return ret;
-
-	if (dest_kn)
-		*dest_kn = kn;
-
-	/*
-	 * Create the subdirectories for each domain. Note that all events
-	 * in a domain like L3 are grouped into a resource whose domain is L3
-	 */
-	for_each_mon_enabled_rdt_resource(r) {
-		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
-		if (ret)
-			goto out_destroy;
-	}
-
-	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
-}
-
-/**
- * cbm_ensure_valid - Enforce validity on provided CBM
- * @_val:	Candidate CBM
- * @r:		RDT resource to which the CBM belongs
- *
- * The provided CBM represents all cache portions available for use. This
- * may be represented by a bitmap that does not consist of contiguous ones
- * and thus be an invalid CBM.
- * Here the provided CBM is forced to be a valid CBM by only considering
- * the first set of contiguous bits as valid and clearing all bits.
- * The intention here is to provide a valid default CBM with which a new
- * resource group is initialized. The user can follow this with a
- * modification to the CBM if the default does not satisfy the
- * requirements.
- */
-static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
-{
-	unsigned int cbm_len = r->cache.cbm_len;
-	unsigned long first_bit, zero_bit;
-	unsigned long val = _val;
-
-	if (!val)
-		return 0;
-
-	first_bit = find_first_bit(&val, cbm_len);
-	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
-
-	/* Clear any remaining bits to ensure contiguous region */
-	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
-	return (u32)val;
-}
-
-/*
- * Initialize cache resources per RDT domain
- *
- * Set the RDT domain up to start off with all usable allocations. That is,
- * all shareable and unused bits. All-zero CBM is invalid.
- */
-static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
-				 u32 closid)
-{
-	struct rdt_resource *r_cdp = NULL;
-	struct rdt_domain *d_cdp = NULL;
-	u32 used_b = 0, unused_b = 0;
-	unsigned long tmp_cbm;
-	enum rdtgrp_mode mode;
-	u32 peer_ctl, *ctrl;
-	int i;
-
-	rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
-	d->have_new_ctrl = false;
-	d->new_ctrl = r->cache.shareable_bits;
-	used_b = r->cache.shareable_bits;
-	ctrl = d->ctrl_val;
-	for (i = 0; i < closids_supported(); i++, ctrl++) {
-		if (closid_allocated(i) && i != closid) {
-			mode = rdtgroup_mode_by_closid(i);
-			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
-				/*
-				 * ctrl values for locksetup aren't relevant
-				 * until the schemata is written, and the mode
-				 * becomes RDT_MODE_PSEUDO_LOCKED.
-				 */
-				continue;
-			/*
-			 * If CDP is active include peer domain's
-			 * usage to ensure there is no overlap
-			 * with an exclusive group.
-			 */
-			if (d_cdp)
-				peer_ctl = d_cdp->ctrl_val[i];
-			else
-				peer_ctl = 0;
-			used_b |= *ctrl | peer_ctl;
-			if (mode == RDT_MODE_SHAREABLE)
-				d->new_ctrl |= *ctrl | peer_ctl;
-		}
-	}
-	if (d->plr && d->plr->cbm > 0)
-		used_b |= d->plr->cbm;
-	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
-	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
-	d->new_ctrl |= unused_b;
-	/*
-	 * Force the initial CBM to be valid, user can
-	 * modify the CBM based on system availability.
-	 */
-	d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
-	/*
-	 * Assign the u32 CBM to an unsigned long to ensure that
-	 * bitmap_weight() does not access out-of-bound memory.
-	 */
-	tmp_cbm = d->new_ctrl;
-	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
-		rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
-		return -ENOSPC;
-	}
-	d->have_new_ctrl = true;
-
-	return 0;
-}
-
-/*
- * Initialize cache resources with default values.
- *
- * A new RDT group is being created on an allocation capable (CAT)
- * supporting system. Set this group up to start off with all usable
- * allocations.
- *
- * If there are no more shareable bits available on any domain then
- * the entire allocation will fail.
- */
-static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
-{
-	struct rdt_domain *d;
-	int ret;
-
-	list_for_each_entry(d, &r->domains, list) {
-		ret = __init_one_rdt_domain(d, r, closid);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-/* Initialize MBA resource with default values. */
-static void rdtgroup_init_mba(struct rdt_resource *r)
-{
-	struct rdt_domain *d;
-
-	list_for_each_entry(d, &r->domains, list) {
-		d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
-		d->have_new_ctrl = true;
-	}
-}
-
-/* Initialize the RDT group's allocations. */
-static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
-{
-	struct rdt_resource *r;
-	int ret;
-
-	for_each_alloc_enabled_rdt_resource(r) {
-		if (r->rid == RDT_RESOURCE_MBA) {
-			rdtgroup_init_mba(r);
-		} else {
-			ret = rdtgroup_init_cat(r, rdtgrp->closid);
-			if (ret < 0)
-				return ret;
-		}
-
-		ret = update_domains(r, rdtgrp->closid);
-		if (ret < 0) {
-			rdt_last_cmd_puts("Failed to initialize allocations\n");
-			return ret;
-		}
-
-	}
-
-	rdtgrp->mode = RDT_MODE_SHAREABLE;
-
-	return 0;
-}
-
-static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
-			     const char *name, umode_t mode,
-			     enum rdt_group_type rtype, struct rdtgroup **r)
-{
-	struct rdtgroup *prdtgrp, *rdtgrp;
-	struct kernfs_node *kn;
-	uint files = 0;
-	int ret;
-
-	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
-	if (!prdtgrp) {
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	if (rtype == RDTMON_GROUP &&
-	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
-		ret = -EINVAL;
-		rdt_last_cmd_puts("Pseudo-locking in progress\n");
-		goto out_unlock;
-	}
-
-	/* allocate the rdtgroup. */
-	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
-	if (!rdtgrp) {
-		ret = -ENOSPC;
-		rdt_last_cmd_puts("Kernel out of memory\n");
-		goto out_unlock;
-	}
-	*r = rdtgrp;
-	rdtgrp->mon.parent = prdtgrp;
-	rdtgrp->type = rtype;
-	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
-
-	/* kernfs creates the directory for rdtgrp */
-	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
-	if (IS_ERR(kn)) {
-		ret = PTR_ERR(kn);
-		rdt_last_cmd_puts("kernfs create error\n");
-		goto out_free_rgrp;
-	}
-	rdtgrp->kn = kn;
-
-	/*
-	 * kernfs_remove() will drop the reference count on "kn" which
-	 * will free it. But we still need it to stick around for the
-	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
-	 * which will be dropped by kernfs_put() in rdtgroup_remove().
-	 */
-	kernfs_get(kn);
-
-	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret) {
-		rdt_last_cmd_puts("kernfs perm error\n");
-		goto out_destroy;
-	}
-
-	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
-	ret = rdtgroup_add_files(kn, files);
-	if (ret) {
-		rdt_last_cmd_puts("kernfs fill error\n");
-		goto out_destroy;
-	}
-
-	if (rdt_mon_capable) {
-		ret = alloc_rmid();
-		if (ret < 0) {
-			rdt_last_cmd_puts("Out of RMIDs\n");
-			goto out_destroy;
-		}
-		rdtgrp->mon.rmid = ret;
-
-		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
-		if (ret) {
-			rdt_last_cmd_puts("kernfs subdir error\n");
-			goto out_idfree;
-		}
-	}
-	kernfs_activate(kn);
-
-	/*
-	 * The caller unlocks the parent_kn upon success.
-	 */
-	return 0;
-
-out_idfree:
-	free_rmid(rdtgrp->mon.rmid);
-out_destroy:
-	kernfs_put(rdtgrp->kn);
-	kernfs_remove(rdtgrp->kn);
-out_free_rgrp:
-	kfree(rdtgrp);
-out_unlock:
-	rdtgroup_kn_unlock(parent_kn);
-	return ret;
-}
-
-static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
-{
-	kernfs_remove(rgrp->kn);
-	free_rmid(rgrp->mon.rmid);
-	rdtgroup_remove(rgrp);
-}
-
-/*
- * Create a monitor group under "mon_groups" directory of a control
- * and monitor group(ctrl_mon). This is a resource group
- * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
- */
-static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
-			      const char *name, umode_t mode)
-{
-	struct rdtgroup *rdtgrp, *prgrp;
-	int ret;
-
-	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
-	if (ret)
-		return ret;
-
-	prgrp = rdtgrp->mon.parent;
-	rdtgrp->closid = prgrp->closid;
-
-	/*
-	 * Add the rdtgrp to the list of rdtgrps the parent
-	 * ctrl_mon group has to track.
-	 */
-	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
-
-	rdtgroup_kn_unlock(parent_kn);
-	return ret;
-}
-
-/*
- * These are rdtgroups created under the root directory. Can be used
- * to allocate and monitor resources.
- */
-static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
-				   const char *name, umode_t mode)
-{
-	struct rdtgroup *rdtgrp;
-	struct kernfs_node *kn;
-	u32 closid;
-	int ret;
-
-	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
-	if (ret)
-		return ret;
-
-	kn = rdtgrp->kn;
-	ret = closid_alloc();
-	if (ret < 0) {
-		rdt_last_cmd_puts("Out of CLOSIDs\n");
-		goto out_common_fail;
-	}
-	closid = ret;
-	ret = 0;
-
-	rdtgrp->closid = closid;
-	ret = rdtgroup_init_alloc(rdtgrp);
-	if (ret < 0)
-		goto out_id_free;
-
-	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
-
-	if (rdt_mon_capable) {
-		/*
-		 * Create an empty mon_groups directory to hold the subset
-		 * of tasks and cpus to monitor.
-		 */
-		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
-		if (ret) {
-			rdt_last_cmd_puts("kernfs subdir error\n");
-			goto out_del_list;
-		}
-	}
-
-	goto out_unlock;
-
-out_del_list:
-	list_del(&rdtgrp->rdtgroup_list);
-out_id_free:
-	closid_free(closid);
-out_common_fail:
-	mkdir_rdt_prepare_clean(rdtgrp);
-out_unlock:
-	rdtgroup_kn_unlock(parent_kn);
-	return ret;
-}
-
-/*
- * We allow creating mon groups only with in a directory called "mon_groups"
- * which is present in every ctrl_mon group. Check if this is a valid
- * "mon_groups" directory.
- *
- * 1. The directory should be named "mon_groups".
- * 2. The mon group itself should "not" be named "mon_groups".
- *   This makes sure "mon_groups" directory always has a ctrl_mon group
- *   as parent.
- */
-static bool is_mon_groups(struct kernfs_node *kn, const char *name)
-{
-	return (!strcmp(kn->name, "mon_groups") &&
-		strcmp(name, "mon_groups"));
-}
-
-static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
-			  umode_t mode)
-{
-	/* Do not accept '\n' to avoid unparsable situation. */
-	if (strchr(name, '\n'))
-		return -EINVAL;
-
-	/*
-	 * If the parent directory is the root directory and RDT
-	 * allocation is supported, add a control and monitoring
-	 * subdirectory
-	 */
-	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
-		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
-
-	/*
-	 * If RDT monitoring is supported and the parent directory is a valid
-	 * "mon_groups" directory, add a monitoring subdirectory.
-	 */
-	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
-		return rdtgroup_mkdir_mon(parent_kn, name, mode);
-
-	return -EPERM;
-}
-
-static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
-{
-	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
-	int cpu;
-
-	/* Give any tasks back to the parent group */
-	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
-
-	/* Update per cpu rmid of the moved CPUs first */
-	for_each_cpu(cpu, &rdtgrp->cpu_mask)
-		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
-	/*
-	 * Update the MSR on moved CPUs and CPUs which have moved
-	 * task running on them.
-	 */
-	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
-	update_closid_rmid(tmpmask, NULL);
-
-	rdtgrp->flags = RDT_DELETED;
-	free_rmid(rdtgrp->mon.rmid);
-
-	/*
-	 * Remove the rdtgrp from the parent ctrl_mon group's list
-	 */
-	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
-	list_del(&rdtgrp->mon.crdtgrp_list);
-
-	kernfs_remove(rdtgrp->kn);
-
-	return 0;
-}
-
-static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
-{
-	rdtgrp->flags = RDT_DELETED;
-	list_del(&rdtgrp->rdtgroup_list);
-
-	kernfs_remove(rdtgrp->kn);
-	return 0;
-}
-
-static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
-{
-	int cpu;
-
-	/* Give any tasks back to the default group */
-	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
-
-	/* Give any CPUs back to the default group */
-	cpumask_or(&rdtgroup_default.cpu_mask,
-		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
-
-	/* Update per cpu closid and rmid of the moved CPUs first */
-	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
-		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
-		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
-	}
-
-	/*
-	 * Update the MSR on moved CPUs and CPUs which have moved
-	 * task running on them.
-	 */
-	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
-	update_closid_rmid(tmpmask, NULL);
-
-	closid_free(rdtgrp->closid);
-	free_rmid(rdtgrp->mon.rmid);
-
-	rdtgroup_ctrl_remove(rdtgrp);
-
-	/*
-	 * Free all the child monitor group rmids.
-	 */
-	free_all_child_rdtgrp(rdtgrp);
-
-	return 0;
-}
-
-static int rdtgroup_rmdir(struct kernfs_node *kn)
-{
-	struct kernfs_node *parent_kn = kn->parent;
-	struct rdtgroup *rdtgrp;
-	cpumask_var_t tmpmask;
-	int ret = 0;
-
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	rdtgrp = rdtgroup_kn_lock_live(kn);
-	if (!rdtgrp) {
-		ret = -EPERM;
-		goto out;
-	}
+	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+		hw_dom = resctrl_to_arch_ctrl_dom(d);
 
-	/*
-	 * If the rdtgroup is a ctrl_mon group and parent directory
-	 * is the root directory, remove the ctrl_mon group.
-	 *
-	 * If the rdtgroup is a mon group and parent directory
-	 * is a valid "mon_groups" directory, remove the mon group.
-	 */
-	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
-	    rdtgrp != &rdtgroup_default) {
-		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
-		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
-			ret = rdtgroup_ctrl_remove(rdtgrp);
-		} else {
-			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
-		}
-	} else if (rdtgrp->type == RDTMON_GROUP &&
-		 is_mon_groups(parent_kn, kn->name)) {
-		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
-	} else {
-		ret = -EPERM;
+		for (i = 0; i < hw_res->num_closid; i++)
+			hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r);
+		msr_param.dom = d;
+		smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
 	}
 
-out:
-	rdtgroup_kn_unlock(kn);
-	free_cpumask_var(tmpmask);
-	return ret;
-}
-
-static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
-{
-	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
-		seq_puts(seq, ",cdp");
-
-	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
-		seq_puts(seq, ",cdpl2");
-
-	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
-		seq_puts(seq, ",mba_MBps");
-
-	return 0;
-}
-
-static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
-	.mkdir		= rdtgroup_mkdir,
-	.rmdir		= rdtgroup_rmdir,
-	.show_options	= rdtgroup_show_options,
-};
-
-static int __init rdtgroup_setup_root(void)
-{
-	int ret;
-
-	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
-				      KERNFS_ROOT_CREATE_DEACTIVATED |
-				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
-				      &rdtgroup_default);
-	if (IS_ERR(rdt_root))
-		return PTR_ERR(rdt_root);
-
-	mutex_lock(&rdtgroup_mutex);
-
-	rdtgroup_default.closid = 0;
-	rdtgroup_default.mon.rmid = 0;
-	rdtgroup_default.type = RDTCTRL_GROUP;
-	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
-
-	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
-
-	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
-	if (ret) {
-		kernfs_destroy_root(rdt_root);
-		goto out;
-	}
-
-	rdtgroup_default.kn = rdt_root->kn;
-	kernfs_activate(rdtgroup_default.kn);
-
-out:
-	mutex_unlock(&rdtgroup_mutex);
-
-	return ret;
-}
-
-/*
- * rdtgroup_init - rdtgroup initialization
- *
- * Setup resctrl file system including set up root, create mount point,
- * register rdtgroup filesystem, and initialize files under root directory.
- *
- * Return: 0 on success or -errno
- */
-int __init rdtgroup_init(void)
-{
-	int ret = 0;
-
-	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
-		     sizeof(last_cmd_status_buf));
-
-	ret = rdtgroup_setup_root();
-	if (ret)
-		return ret;
-
-	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
-	if (ret)
-		goto cleanup_root;
-
-	ret = register_filesystem(&rdt_fs_type);
-	if (ret)
-		goto cleanup_mountpoint;
-
-	/*
-	 * Adding the resctrl debugfs directory here may not be ideal since
-	 * it would let the resctrl debugfs directory appear on the debugfs
-	 * filesystem before the resctrl filesystem is mounted.
-	 * It may also be ok since that would enable debugging of RDT before
-	 * resctrl is mounted.
-	 * The reason why the debugfs directory is created here and not in
-	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
-	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
-	 * (the lockdep class of inode->i_rwsem). Other filesystem
-	 * interactions (eg. SyS_getdents) have the lock ordering:
-	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
-	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
-	 * is taken, thus creating dependency:
-	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
-	 * issues considering the other two lock dependencies.
-	 * By creating the debugfs directory here we avoid a dependency
-	 * that may cause deadlock (even though file operations cannot
-	 * occur until the filesystem is mounted, but I do not know how to
-	 * tell lockdep that).
-	 */
-	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
-
-	return 0;
-
-cleanup_mountpoint:
-	sysfs_remove_mount_point(fs_kobj, "resctrl");
-cleanup_root:
-	kernfs_destroy_root(rdt_root);
-
-	return ret;
-}
-
-void __exit rdtgroup_exit(void)
-{
-	debugfs_remove_recursive(debugfs_resctrl);
-	unregister_filesystem(&rdt_fs_type);
-	sysfs_remove_mount_point(fs_kobj, "resctrl");
-	kernfs_destroy_root(rdt_root);
+	return;
 }