summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/resctrl/rdtgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl/rdtgroup.c')
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c672
1 files changed, 457 insertions, 215 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 011e17efb1a6..93ec829015f1 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
@@ -58,6 +57,12 @@ static struct kernfs_node *kn_mongrp;
/* Kernel fs node for "mon_data" directory under root */
static struct kernfs_node *kn_mondata;
+/*
+ * Used to store the max resource name width to display the schemata names in
+ * a tabular format.
+ */
+int max_name_width;
+
static struct seq_buf last_cmd_status;
static char last_cmd_status_buf[512];
@@ -66,6 +71,15 @@ static void rdtgroup_destroy_root(void);
struct dentry *debugfs_resctrl;
+/*
+ * Memory bandwidth monitoring event to use for the default CTRL_MON group
+ * and each new CTRL_MON group created by the user. Only relevant when
+ * the filesystem is mounted with the "mba_MBps" option so it does not
+ * matter that it remains uninitialized on systems that do not support
+ * the "mba_MBps" option.
+ */
+enum resctrl_event_id mba_mbps_default_event;
+
static bool resctrl_debug;
void rdt_last_cmd_clear(void)
@@ -92,17 +106,29 @@ void rdt_last_cmd_printf(const char *fmt, ...)
void rdt_staged_configs_clear(void)
{
+ struct rdt_ctrl_domain *dom;
struct rdt_resource *r;
- struct rdt_domain *dom;
lockdep_assert_held(&rdtgroup_mutex);
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
}
+static bool resctrl_is_mbm_enabled(void)
+{
+ return (resctrl_arch_is_mbm_total_enabled() ||
+ resctrl_arch_is_mbm_local_enabled());
+}
+
+static bool resctrl_is_mbm_event(int e)
+{
+ return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+ e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
/*
* Trivial allocator for CLOSIDs. Since h/w only supports a small number,
* we can keep a bitmap of free CLOSIDs in a single integer.
@@ -149,7 +175,8 @@ static int closid_alloc(void)
lockdep_assert_held(&rdtgroup_mutex);
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
+ resctrl_arch_is_llc_occupancy_enabled()) {
cleanest_closid = resctrl_find_cleanest_closid();
if (cleanest_closid < 0)
return cleanest_closid;
@@ -317,7 +344,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
- mask = &rdtgrp->plr->d->cpu_mask;
+ mask = &rdtgrp->plr->d->hdr.cpu_mask;
seq_printf(s, is_cpu_list(of) ?
"%*pbl\n" : "%*pb\n",
cpumask_pr_args(mask));
@@ -340,13 +367,13 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
* from update_closid_rmid() is protected against __switch_to() because
* preemption is disabled.
*/
-static void update_cpu_closid_rmid(void *info)
+void resctrl_arch_sync_cpu_closid_rmid(void *info)
{
- struct rdtgroup *r = info;
+ struct resctrl_cpu_defaults *r = info;
if (r) {
this_cpu_write(pqr_state.default_closid, r->closid);
- this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+ this_cpu_write(pqr_state.default_rmid, r->rmid);
}
/*
@@ -361,11 +388,20 @@ static void update_cpu_closid_rmid(void *info)
* Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
*
* Per task closids/rmids must have been set up before calling this function.
+ * @r may be NULL.
*/
static void
update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
{
- on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
+ struct resctrl_cpu_defaults defaults, *p = NULL;
+
+ if (r) {
+ defaults.closid = r->closid;
+ defaults.rmid = r->mon.rmid;
+ p = &defaults;
+ }
+
+ on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
}
static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
@@ -908,14 +944,14 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
continue;
seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
- rdtg->kn->name);
+ rdt_kn_name(rdtg->kn));
seq_puts(s, "mon:");
list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
mon.crdtgrp_list) {
if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
crg->mon.rmid))
continue;
- seq_printf(s, "%s", crg->kn->name);
+ seq_printf(s, "%s", rdt_kn_name(crg->kn));
break;
}
seq_putc(s, '\n');
@@ -948,10 +984,20 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
return 0;
}
+static void *rdt_kn_parent_priv(struct kernfs_node *kn)
+{
+ /*
+ * The parent pointer is only valid within RCU section since it can be
+ * replaced.
+ */
+ guard(rcu)();
+ return rcu_dereference(kn->__parent)->priv;
+}
+
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
seq_printf(seq, "%u\n", s->num_closid);
return 0;
@@ -960,17 +1006,17 @@ static int rdt_num_closids_show(struct kernfs_open_file *of,
static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
- seq_printf(seq, "%x\n", r->default_ctrl);
+ seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
return 0;
}
static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
@@ -980,7 +1026,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
static int rdt_shareable_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
seq_printf(seq, "%x\n", r->cache.shareable_bits);
@@ -1004,7 +1050,7 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of,
static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
/*
* Use unsigned long even though only 32 bits are used to ensure
* test_bit() is used safely.
@@ -1012,7 +1058,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
unsigned long sw_shareable = 0, hw_shareable = 0;
unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_resource *r = s->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
bool sep = false;
@@ -1021,12 +1067,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
sw_shareable = 0;
exclusive = 0;
- seq_printf(seq, "%d=", dom->id);
+ seq_printf(seq, "%d=", dom->hdr.id);
for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
@@ -1086,7 +1132,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
static int rdt_min_bw_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.min_bw);
@@ -1096,7 +1142,7 @@ static int rdt_min_bw_show(struct kernfs_open_file *of,
static int rdt_num_rmids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
seq_printf(seq, "%d\n", r->num_rmid);
@@ -1106,7 +1152,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of,
static int rdt_mon_features_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
struct mon_evt *mevt;
list_for_each_entry(mevt, &r->evt_list, list) {
@@ -1121,7 +1167,7 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
static int rdt_bw_gran_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.bw_gran);
@@ -1131,7 +1177,7 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of,
static int rdt_delay_linear_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.delay_linear);
@@ -1149,13 +1195,22 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
- if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
+ switch (r->membw.throttle_mode) {
+ case THREAD_THROTTLE_PER_THREAD:
seq_puts(seq, "per-thread\n");
- else
+ return 0;
+ case THREAD_THROTTLE_MAX:
seq_puts(seq, "max\n");
+ return 0;
+ case THREAD_THROTTLE_UNDEFINED:
+ seq_puts(seq, "undefined\n");
+ return 0;
+ }
+
+ WARN_ON_ONCE(1);
return 0;
}
@@ -1214,7 +1269,7 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct resctrl_schema *s = of->kn->parent->priv;
+ struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
@@ -1243,7 +1298,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
*
* Return: false if CBM does not overlap, true if it does.
*/
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid,
enum resctrl_conf_type type, bool exclusive)
{
@@ -1298,7 +1353,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -1329,10 +1384,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
+ struct rdt_ctrl_domain *d;
struct resctrl_schema *s;
struct rdt_resource *r;
bool has_cache = false;
- struct rdt_domain *d;
u32 ctrl;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -1343,7 +1398,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
continue;
has_cache = true;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
ctrl = resctrl_arch_get_config(r, d, closid,
s->conf_type);
if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
@@ -1417,7 +1472,8 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
goto out;
}
rdtgrp->mode = RDT_MODE_EXCLUSIVE;
- } else if (!strcmp(buf, "pseudo-locksetup")) {
+ } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
+ !strcmp(buf, "pseudo-locksetup")) {
ret = rdtgroup_locksetup_enter(rdtgrp);
if (ret)
goto out;
@@ -1448,20 +1504,19 @@ out:
* bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, unsigned long cbm)
+ struct rdt_ctrl_domain *d, unsigned long cbm)
{
- struct cpu_cacheinfo *ci;
unsigned int size = 0;
- int num_b, i;
+ struct cacheinfo *ci;
+ int num_b;
+
+ if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
+ return size;
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
- ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
- for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == r->cache_level) {
- size = ci->info_list[i].size / r->cache.cbm_len * num_b;
- break;
- }
- }
+ ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
+ if (ci)
+ size = ci->size / r->cache.cbm_len * num_b;
return size;
}
@@ -1477,9 +1532,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
{
struct resctrl_schema *schema;
enum resctrl_conf_type type;
+ struct rdt_ctrl_domain *d;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
unsigned int size;
int ret = 0;
u32 closid;
@@ -1503,7 +1558,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
}
goto out;
}
@@ -1515,7 +1570,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
type = schema->conf_type;
sep = false;
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1533,7 +1588,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
else
size = rdtgroup_cbm_to_size(r, d, ctrl);
}
- seq_printf(s, "%d=%u", d->id, size);
+ seq_printf(s, "%d=%u", d->hdr.id, size);
sep = true;
}
seq_putc(s, '\n');
@@ -1545,11 +1600,6 @@ out:
return ret;
}
-struct mon_config_info {
- u32 evtid;
- u32 mon_config;
-};
-
#define INVALID_CONFIG_INDEX UINT_MAX
/**
@@ -1574,46 +1624,49 @@ static inline unsigned int mon_event_config_index_get(u32 evtid)
}
}
-static void mon_event_config_read(void *info)
+void resctrl_arch_mon_event_config_read(void *_config_info)
{
- struct mon_config_info *mon_info = info;
+ struct resctrl_mon_config_info *config_info = _config_info;
unsigned int index;
u64 msrval;
- index = mon_event_config_index_get(mon_info->evtid);
+ index = mon_event_config_index_get(config_info->evtid);
if (index == INVALID_CONFIG_INDEX) {
- pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+ pr_warn_once("Invalid event id %d\n", config_info->evtid);
return;
}
rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
/* Report only the valid event configuration bits */
- mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
+ config_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
}
-static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
{
- smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+ smp_call_function_any(&mon_info->d->hdr.cpu_mask,
+ resctrl_arch_mon_event_config_read, mon_info, 1);
}
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
- struct mon_config_info mon_info = {0};
- struct rdt_domain *dom;
+ struct resctrl_mon_config_info mon_info;
+ struct rdt_mon_domain *dom;
bool sep = false;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
if (sep)
seq_puts(s, ";");
- memset(&mon_info, 0, sizeof(struct mon_config_info));
+ memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
+ mon_info.r = r;
+ mon_info.d = dom;
mon_info.evtid = evtid;
- mondata_config_read(dom, &mon_info);
+ mondata_config_read(&mon_info);
- seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+ seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
sep = true;
}
seq_puts(s, "\n");
@@ -1627,7 +1680,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
@@ -1637,37 +1690,39 @@ static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
return 0;
}
-static void mon_event_config_write(void *info)
+void resctrl_arch_mon_event_config_write(void *_config_info)
{
- struct mon_config_info *mon_info = info;
+ struct resctrl_mon_config_info *config_info = _config_info;
unsigned int index;
- index = mon_event_config_index_get(mon_info->evtid);
+ index = mon_event_config_index_get(config_info->evtid);
if (index == INVALID_CONFIG_INDEX) {
- pr_warn_once("Invalid event id %d\n", mon_info->evtid);
+ pr_warn_once("Invalid event id %d\n", config_info->evtid);
return;
}
- wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
+ wrmsr(MSR_IA32_EVT_CFG_BASE + index, config_info->mon_config, 0);
}
static void mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_domain *d, u32 evtid, u32 val)
+ struct rdt_mon_domain *d, u32 evtid, u32 val)
{
- struct mon_config_info mon_info = {0};
+ struct resctrl_mon_config_info mon_info = {0};
/*
* Read the current config value first. If both are the same then
* no need to write it again.
*/
+ mon_info.r = r;
+ mon_info.d = d;
mon_info.evtid = evtid;
- mondata_config_read(d, &mon_info);
+ mondata_config_read(&mon_info);
if (mon_info.mon_config == val)
return;
@@ -1679,7 +1734,7 @@ static void mbm_config_write_domain(struct rdt_resource *r,
* are scoped at the domain level. Writing any of these MSRs
* on one CPU is observed by all the CPUs in the domain.
*/
- smp_call_function_any(&d->cpu_mask, mon_event_config_write,
+ smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
&mon_info, 1);
/*
@@ -1696,10 +1751,9 @@ static void mbm_config_write_domain(struct rdt_resource *r,
static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
{
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
@@ -1723,14 +1777,14 @@ next:
}
/* Value from user cannot be more than the supported set of events */
- if ((val & hw_res->mbm_cfg_mask) != val) {
+ if ((val & r->mbm_cfg_mask) != val) {
rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
- hw_res->mbm_cfg_mask);
+ r->mbm_cfg_mask);
return -EINVAL;
}
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
mbm_config_write_domain(r, d, evtid, val);
goto next;
}
@@ -1743,7 +1797,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
int ret;
/* Valid input requires a trailing newline */
@@ -1769,7 +1823,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
int ret;
/* Valid input requires a trailing newline */
@@ -1944,6 +1998,13 @@ static struct rftype res_common_files[] = {
.fflags = RFTYPE_CTRL_BASE,
},
{
+ .name = "mba_MBps_event",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_mba_mbps_event_write,
+ .seq_show = rdtgroup_mba_mbps_event_show,
+ },
+ {
.name = "mode",
.mode = 0644,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -2022,24 +2083,35 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
return NULL;
}
-void __init thread_throttle_mode_init(void)
+static void thread_throttle_mode_init(void)
{
- struct rftype *rft;
+ enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
+ struct rdt_resource *r_mba, *r_smba;
+
+ r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+ if (r_mba->alloc_capable &&
+ r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
+ throttle_mode = r_mba->membw.throttle_mode;
- rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
- if (!rft)
+ r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
+ if (r_smba->alloc_capable &&
+ r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
+ throttle_mode = r_smba->membw.throttle_mode;
+
+ if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
return;
- rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB;
+ resctrl_file_fflags_init("thread_throttle_mode",
+ RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
}
-void __init mbm_config_rftype_init(const char *config)
+void resctrl_file_fflags_init(const char *config, unsigned long fflags)
{
struct rftype *rft;
rft = rdtgroup_get_rftype_by_name(config);
if (rft)
- rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE;
+ rft->fflags = fflags;
}
/**
@@ -2161,6 +2233,20 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
return ret;
}
+static unsigned long fflags_from_resource(struct rdt_resource *r)
+{
+ switch (r->rid) {
+ case RDT_RESOURCE_L3:
+ case RDT_RESOURCE_L2:
+ return RFTYPE_RES_CACHE;
+ case RDT_RESOURCE_MBA:
+ case RDT_RESOURCE_SMBA:
+ return RFTYPE_RES_MB;
+ }
+
+ return WARN_ON_ONCE(1);
+}
+
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
struct resctrl_schema *s;
@@ -2181,14 +2267,14 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
/* loop over enabled controls, these are all alloc_capable */
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
- fflags = r->fflags | RFTYPE_CTRL_INFO;
+ fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
if (ret)
goto out_destroy;
}
for_each_mon_capable_rdt_resource(r) {
- fflags = r->fflags | RFTYPE_MON_INFO;
+ fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
sprintf(name, "%s_MON", r->name);
ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
if (ret)
@@ -2252,15 +2338,15 @@ static void l2_qos_cfg_update(void *arg)
static inline bool is_mba_linear(void)
{
- return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
+ return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
}
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
+ struct rdt_ctrl_domain *d;
struct rdt_resource *r_l;
cpumask_var_t cpu_mask;
- struct rdt_domain *d;
int cpu;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -2277,14 +2363,14 @@ static int set_cache_qos_cfg(int level, bool enable)
return -ENOMEM;
r_l = &rdt_resources_all[level].r_resctrl;
- list_for_each_entry(d, &r_l->domains, list) {
+ list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
- for_each_cpu(cpu, &d->cpu_mask)
+ for_each_cpu(cpu, &d->hdr.cpu_mask)
cpumask_set_cpu(cpu, cpu_mask);
else
/* Pick one CPU from each domain instance to update MSR */
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
}
/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
@@ -2310,10 +2396,10 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
l3_qos_cfg_update(&hw_res->cdp_enabled);
}
-static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
u32 num_closid = resctrl_arch_get_num_closid(r);
- int cpu = cpumask_any(&d->cpu_mask);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
int i;
d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
@@ -2328,7 +2414,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
}
static void mba_sc_domain_destroy(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
kfree(d->mbps_val);
d->mbps_val = NULL;
@@ -2336,14 +2422,18 @@ static void mba_sc_domain_destroy(struct rdt_resource *r,
/*
* MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale.
+ * MBM is supported and MBA is in linear scale,
+ * and the MBM monitor scope is the same as MBA
+ * control scope.
*/
static bool supports_mba_mbps(void)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
- return (is_mbm_local_enabled() &&
- r->alloc_capable && is_mba_linear());
+ return (resctrl_is_mbm_enabled() &&
+ r->alloc_capable && is_mba_linear() &&
+ r->ctrl_scope == rmbm->mon_scope);
}
/*
@@ -2352,9 +2442,10 @@ static bool supports_mba_mbps(void)
*/
static int set_mba_sc(bool mba_sc)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
u32 num_closid = resctrl_arch_get_num_closid(r);
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
+ unsigned long fflags;
int i;
if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
@@ -2362,11 +2453,16 @@ static int set_mba_sc(bool mba_sc)
r->membw.mba_sc = mba_sc;
- list_for_each_entry(d, &r->domains, list) {
+ rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
+
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
+ fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
+ resctrl_file_fflags_init("mba_MBps_event", fflags);
+
return 0;
}
@@ -2427,12 +2523,13 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
* resource. "info" and its subdirectories don't
* have rdtgroup structures, so return NULL here.
*/
- if (kn == kn_info || kn->parent == kn_info)
+ if (kn == kn_info ||
+ rcu_access_pointer(kn->__parent) == kn_info)
return NULL;
else
return kn->priv;
} else {
- return kn->parent->priv;
+ return rdt_kn_parent_priv(kn);
}
}
@@ -2583,6 +2680,20 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type
if (cl > max_name_width)
max_name_width = cl;
+ switch (r->schema_fmt) {
+ case RESCTRL_SCHEMA_BITMAP:
+ s->fmt_str = "%d=%x";
+ break;
+ case RESCTRL_SCHEMA_RANGE:
+ s->fmt_str = "%d=%u";
+ break;
+ }
+
+ if (WARN_ON_ONCE(!s->fmt_str)) {
+ kfree(s);
+ return -EINVAL;
+ }
+
INIT_LIST_HEAD(&s->list);
list_add(&s->list, &resctrl_schema_all);
@@ -2626,7 +2737,7 @@ static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
unsigned long flags = RFTYPE_CTRL_BASE;
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
struct rdt_resource *r;
int ret;
@@ -2699,9 +2810,9 @@ static int rdt_get_tree(struct fs_context *fc)
if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
resctrl_mounted = true;
- if (is_mbm_enabled()) {
- r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- list_for_each_entry(dom, &r->domains, list)
+ if (resctrl_is_mbm_enabled()) {
+ r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ list_for_each_entry(dom, &r->mon_domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
@@ -2751,6 +2862,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct fs_parse_result result;
+ const char *msg;
int opt;
opt = fs_parse(fc, rdt_fs_parameters, param, &result);
@@ -2765,8 +2877,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mbps:
+ msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
if (!supports_mba_mbps())
- return -EINVAL;
+ return invalfc(fc, msg);
ctx->enable_mba_mbps = true;
return 0;
case Opt_debug:
@@ -2808,44 +2921,36 @@ static int rdt_init_fs_context(struct fs_context *fc)
return 0;
}
-static int reset_all_ctrls(struct rdt_resource *r)
+void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
- cpumask_var_t cpu_mask;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int i;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
- return -ENOMEM;
-
msr_param.res = r;
msr_param.low = 0;
msr_param.high = hw_res->num_closid;
/*
* Disable resource control for this resource by setting all
- * CBMs in all domains to the maximum mask value. Pick one CPU
+ * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
* from each domain to update the MSRs below.
*/
- list_for_each_entry(d, &r->domains, list) {
- hw_dom = resctrl_to_arch_dom(d);
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
for (i = 0; i < hw_res->num_closid; i++)
- hw_dom->ctrl_val[i] = r->default_ctrl;
+ hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r);
+ msr_param.dom = d;
+ smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
- /* Update CBM on all the CPUs in cpu_mask */
- on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
-
- free_cpumask_var(cpu_mask);
-
- return 0;
+ return;
}
/*
@@ -2964,9 +3069,10 @@ static void rdt_kill_sb(struct super_block *sb)
rdt_disable_ctx();
- /*Put everything back to default values. */
+ /* Put everything back to default values. */
for_each_alloc_capable_rdt_resource(r)
- reset_all_ctrls(r);
+ resctrl_arch_reset_all_ctrls(r);
+
rmdir_all_sub();
rdt_pseudo_lock_release();
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
@@ -3010,62 +3116,126 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
return ret;
}
+static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_find_and_get(pkn, name);
+ if (!kn)
+ return;
+ kernfs_put(kn);
+
+ if (kn->dir.subdirs <= 1)
+ kernfs_remove(kn);
+ else
+ kernfs_remove_by_name(kn, subname);
+}
+
/*
* Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups with given domain id.
+ * and monitor groups for the given domain.
+ * Remove files and directories containing "sum" of domain data
+ * when last domain being summed is removed.
*/
static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- unsigned int dom_id)
+ struct rdt_mon_domain *d)
{
struct rdtgroup *prgrp, *crgrp;
+ char subname[32];
+ bool snc_mode;
char name[32];
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ if (snc_mode)
+ sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
+
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- sprintf(name, "mon_%s_%02d", r->name, dom_id);
- kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+ mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
- kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
+ mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
}
}
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
- struct rdt_domain *d,
- struct rdt_resource *r, struct rdtgroup *prgrp)
+static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp,
+ bool do_sum)
{
+ struct rmid_read rr = {0};
union mon_data_bits priv;
- struct kernfs_node *kn;
struct mon_evt *mevt;
- struct rmid_read rr;
- char name[32];
int ret;
- sprintf(name, "mon_%s_%02d", r->name, d->id);
- /* create the directory */
- kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
- goto out_destroy;
-
- if (WARN_ON(list_empty(&r->evt_list))) {
- ret = -EPERM;
- goto out_destroy;
- }
+ if (WARN_ON(list_empty(&r->evt_list)))
+ return -EPERM;
priv.u.rid = r->rid;
- priv.u.domid = d->id;
+ priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
+ priv.u.sum = do_sum;
list_for_each_entry(mevt, &r->evt_list, list) {
priv.u.evtid = mevt->evtid;
ret = mon_addfile(kn, mevt->name, priv.priv);
if (ret)
+ return ret;
+
+ if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
+ mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
+ }
+
+ return 0;
+}
+
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+ struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+ struct kernfs_node *kn, *ckn;
+ char name[32];
+ bool snc_mode;
+ int ret = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ kn = kernfs_find_and_get(parent_kn, name);
+ if (kn) {
+ /*
+ * rdtgroup_mutex will prevent this directory from being
+ * removed. No need to keep this hold.
+ */
+ kernfs_put(kn);
+ } else {
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+ ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
+ if (ret)
+ goto out_destroy;
+ }
+
+ if (snc_mode) {
+ sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
+ ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(ckn)) {
+ ret = -EINVAL;
+ goto out_destroy;
+ }
+
+ ret = rdtgroup_kn_set_ugid(ckn);
+ if (ret)
goto out_destroy;
- if (is_mbm_event(mevt->evtid))
- mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
+ ret = mon_add_all_files(ckn, d, r, prgrp, false);
+ if (ret)
+ goto out_destroy;
}
+
kernfs_activate(kn);
return 0;
@@ -3079,7 +3249,7 @@ out_destroy:
* and "monitor" groups with given domain id.
*/
static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_mon_domain *d)
{
struct kernfs_node *parent_kn;
struct rdtgroup *prgrp, *crgrp;
@@ -3101,13 +3271,13 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_resource *r,
struct rdtgroup *prgrp)
{
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
int ret;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
@@ -3206,7 +3376,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
* Set the RDT domain up to start off with all usable allocations. That is,
* all shareable and unused bits. All-zero CBM is invalid.
*/
-static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
+static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
u32 closid)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -3266,7 +3436,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
return -ENOSPC;
}
cfg->have_new_ctrl = true;
@@ -3286,10 +3456,10 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int ret;
- list_for_each_entry(d, &s->res->domains, list) {
+ list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
@@ -3302,16 +3472,16 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (is_mba_sc(r)) {
d->mbps_val[closid] = MBA_MAX_MBPS;
continue;
}
cfg = &d->staged_config[CDP_NONE];
- cfg->new_ctrl = r->default_ctrl;
+ cfg->new_ctrl = resctrl_get_default_ctrl(r);
cfg->have_new_ctrl = true;
}
}
@@ -3562,6 +3732,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
}
+ if (is_mba_sc(NULL))
+ rdtgrp->mba_mbps_event = mba_mbps_default_event;
}
goto out_unlock;
@@ -3591,7 +3763,7 @@ out_unlock:
*/
static bool is_mon_groups(struct kernfs_node *kn, const char *name)
{
- return (!strcmp(kn->name, "mon_groups") &&
+ return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
strcmp(name, "mon_groups"));
}
@@ -3623,14 +3795,21 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+ u32 closid, rmid;
int cpu;
/* Give any tasks back to the parent group */
rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
- /* Update per cpu rmid of the moved CPUs first */
+ /*
+ * Update per cpu closid/rmid of the moved CPUs first.
+ * Note: the closid will not change, but the arch code still needs it.
+ */
+ closid = prdtgrp->closid;
+ rmid = prdtgrp->mon.rmid;
for_each_cpu(cpu, &rdtgrp->cpu_mask)
- per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
+ resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
+
/*
* Update the MSR on moved CPUs and CPUs which have moved
* task running on them.
@@ -3663,6 +3842,7 @@ static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
+ u32 closid, rmid;
int cpu;
/* Give any tasks back to the default group */
@@ -3673,10 +3853,10 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
/* Update per cpu closid and rmid of the moved CPUs first */
- for_each_cpu(cpu, &rdtgrp->cpu_mask) {
- per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
- per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
- }
+ closid = rdtgroup_default.closid;
+ rmid = rdtgroup_default.mon.rmid;
+ for_each_cpu(cpu, &rdtgrp->cpu_mask)
+ resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
/*
* Update the MSR on moved CPUs and CPUs which have moved
@@ -3698,9 +3878,18 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
return 0;
}
+static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn)
+{
+ /*
+ * Valid within the RCU section it was obtained or while rdtgroup_mutex
+ * is held.
+ */
+ return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
+}
+
static int rdtgroup_rmdir(struct kernfs_node *kn)
{
- struct kernfs_node *parent_kn = kn->parent;
+ struct kernfs_node *parent_kn;
struct rdtgroup *rdtgrp;
cpumask_var_t tmpmask;
int ret = 0;
@@ -3713,6 +3902,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
ret = -EPERM;
goto out;
}
+ parent_kn = rdt_kn_parent(kn);
/*
* If the rdtgroup is a ctrl_mon group and parent directory
@@ -3730,7 +3920,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
}
} else if (rdtgrp->type == RDTMON_GROUP &&
- is_mon_groups(parent_kn, kn->name)) {
+ is_mon_groups(parent_kn, rdt_kn_name(kn))) {
ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
} else {
ret = -EPERM;
@@ -3781,6 +3971,7 @@ static void mongrp_reparent(struct rdtgroup *rdtgrp,
static int rdtgroup_rename(struct kernfs_node *kn,
struct kernfs_node *new_parent, const char *new_name)
{
+ struct kernfs_node *kn_parent;
struct rdtgroup *new_prdtgrp;
struct rdtgroup *rdtgrp;
cpumask_var_t tmpmask;
@@ -3815,8 +4006,9 @@ static int rdtgroup_rename(struct kernfs_node *kn,
goto out;
}
- if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
- !is_mon_groups(kn->parent, kn->name)) {
+ kn_parent = rdt_kn_parent(kn);
+ if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
+ !is_mon_groups(kn_parent, rdt_kn_name(kn))) {
rdt_last_cmd_puts("Source must be a MON group\n");
ret = -EPERM;
goto out;
@@ -3877,7 +4069,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
seq_puts(seq, ",cdpl2");
- if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
+ if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
seq_puts(seq, ",mba_MBps");
if (resctrl_debug)
@@ -3928,33 +4120,37 @@ static void __init rdtgroup_setup_default(void)
mutex_unlock(&rdtgroup_mutex);
}
-static void domain_destroy_mon_state(struct rdt_domain *d)
+static void domain_destroy_mon_state(struct rdt_mon_domain *d)
{
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
}
-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
mutex_lock(&rdtgroup_mutex);
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);
- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ mutex_lock(&rdtgroup_mutex);
/*
* If resctrl is mounted, remove all the
* per domain monitor data directories.
*/
if (resctrl_mounted && resctrl_arch_mon_capable())
- rmdir_mondata_subdir_allrdtgrp(r, d->id);
+ rmdir_mondata_subdir_allrdtgrp(r, d);
- if (is_mbm_enabled())
+ if (resctrl_is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
- if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
+ if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
/*
* When a package is going down, forcefully
* decrement rmid->ebusy. There is no way to know
@@ -3969,21 +4165,33 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
domain_destroy_mon_state(d);
-out_unlock:
mutex_unlock(&rdtgroup_mutex);
}
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+/**
+ * domain_setup_mon_state() - Initialise domain monitoring structures.
+ * @r: The resource for the newly online domain.
+ * @d: The newly online domain.
+ *
+ * Allocate monitor resources that belong to this domain.
+ * Called when the first CPU of a domain comes online, regardless of whether
+ * the filesystem is mounted.
+ * During boot this may be called before global allocations have been made by
+ * resctrl_mon_resource_init().
+ *
+ * Returns 0 for success, or -ENOMEM.
+ */
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
- if (is_llc_occupancy_enabled()) {
+ if (resctrl_arch_is_llc_occupancy_enabled()) {
d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
- if (is_mbm_total_enabled()) {
+ if (resctrl_arch_is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
@@ -3991,7 +4199,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
return -ENOMEM;
}
}
- if (is_mbm_local_enabled()) {
+ if (resctrl_arch_is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
@@ -4004,7 +4212,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
return 0;
}
-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
int err = 0;
@@ -4013,23 +4221,30 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
err = mba_sc_domain_allocate(r, d);
- goto out_unlock;
}
- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ int err;
+
+ mutex_lock(&rdtgroup_mutex);
err = domain_setup_mon_state(r, d);
if (err)
goto out_unlock;
- if (is_mbm_enabled()) {
+ if (resctrl_is_mbm_enabled()) {
INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
- if (is_llc_occupancy_enabled())
+ if (resctrl_arch_is_llc_occupancy_enabled())
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
/*
@@ -4065,11 +4280,27 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
}
}
+static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
+ struct rdt_resource *r)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
void resctrl_offline_cpu(unsigned int cpu)
{
- struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
- struct rdt_domain *d;
mutex_lock(&rdtgroup_mutex);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
@@ -4082,14 +4313,14 @@ void resctrl_offline_cpu(unsigned int cpu)
if (!l3->mon_capable)
goto out_unlock;
- d = get_domain_from_cpu(cpu, l3);
+ d = get_mon_domain_from_cpu(cpu, l3);
if (d) {
- if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
mbm_setup_overflow_handler(d, 0, cpu);
}
- if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
- has_busy_rmid(d)) {
+ if (resctrl_arch_is_llc_occupancy_enabled() &&
+ cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
cancel_delayed_work(&d->cqm_limbo);
cqm_setup_limbo_handler(d, 0, cpu);
}
@@ -4100,14 +4331,14 @@ out_unlock:
}
/*
- * rdtgroup_init - rdtgroup initialization
+ * resctrl_init - resctrl filesystem initialization
*
* Setup resctrl file system including set up root, create mount point,
- * register rdtgroup filesystem, and initialize files under root directory.
+ * register resctrl filesystem, and initialize files under root directory.
*
* Return: 0 on success or -errno
*/
-int __init rdtgroup_init(void)
+int __init resctrl_init(void)
{
int ret = 0;
@@ -4116,10 +4347,18 @@ int __init rdtgroup_init(void)
rdtgroup_setup_default();
- ret = sysfs_create_mount_point(fs_kobj, "resctrl");
+ thread_throttle_mode_init();
+
+ ret = resctrl_mon_resource_init();
if (ret)
return ret;
+ ret = sysfs_create_mount_point(fs_kobj, "resctrl");
+ if (ret) {
+ resctrl_mon_resource_exit();
+ return ret;
+ }
+
ret = register_filesystem(&rdt_fs_type);
if (ret)
goto cleanup_mountpoint;
@@ -4151,13 +4390,16 @@ int __init rdtgroup_init(void)
cleanup_mountpoint:
sysfs_remove_mount_point(fs_kobj, "resctrl");
+ resctrl_mon_resource_exit();
return ret;
}
-void __exit rdtgroup_exit(void)
+void __exit resctrl_exit(void)
{
debugfs_remove_recursive(debugfs_resctrl);
unregister_filesystem(&rdt_fs_type);
sysfs_remove_mount_point(fs_kobj, "resctrl");
+
+ resctrl_mon_resource_exit();
}