summaryrefslogtreecommitdiff
path: root/drivers/base/arch_topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/base/arch_topology.c')
-rw-r--r--drivers/base/arch_topology.c176
1 files changed, 119 insertions, 57 deletions
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 3ebe77566788..84ec92bff642 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -11,6 +11,7 @@
#include <linux/cleanup.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
#include <linux/device.h>
#include <linux/of.h>
#include <linux/slab.h>
@@ -28,7 +29,7 @@
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask;
static bool scale_freq_invariant;
-DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
+DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 0;
EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
static bool supports_scale_freq_counters(const struct cpumask *cpus)
@@ -153,14 +154,6 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
per_cpu(arch_freq_scale, i) = scale;
}
-DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
-
-void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
-{
- per_cpu(cpu_scale, cpu) = capacity;
-}
-
DEFINE_PER_CPU(unsigned long, hw_pressure);
/**
@@ -206,53 +199,9 @@ void topology_update_hw_pressure(const struct cpumask *cpus,
}
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
-static ssize_t cpu_capacity_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
-}
-
static void update_topology_flags_workfn(struct work_struct *work);
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
-static DEVICE_ATTR_RO(cpu_capacity);
-
-static int cpu_capacity_sysctl_add(unsigned int cpu)
-{
- struct device *cpu_dev = get_cpu_device(cpu);
-
- if (!cpu_dev)
- return -ENOENT;
-
- device_create_file(cpu_dev, &dev_attr_cpu_capacity);
-
- return 0;
-}
-
-static int cpu_capacity_sysctl_remove(unsigned int cpu)
-{
- struct device *cpu_dev = get_cpu_device(cpu);
-
- if (!cpu_dev)
- return -ENOENT;
-
- device_remove_file(cpu_dev, &dev_attr_cpu_capacity);
-
- return 0;
-}
-
-static int register_cpu_capacity_sysctl(void)
-{
- cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity",
- cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove);
-
- return 0;
-}
-subsys_initcall(register_cpu_capacity_sysctl);
-
static int update_topology;
int topology_update_cpu_topology(void)
@@ -293,13 +242,15 @@ void topology_normalize_cpu_scale(void)
capacity_scale = 1;
for_each_possible_cpu(cpu) {
- capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
+ capacity = raw_capacity[cpu] *
+ (per_cpu(capacity_freq_ref, cpu) ?: 1);
capacity_scale = max(capacity, capacity_scale);
}
pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
for_each_possible_cpu(cpu) {
- capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
+ capacity = raw_capacity[cpu] *
+ (per_cpu(capacity_freq_ref, cpu) ?: 1);
capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
capacity_scale);
topology_set_cpu_scale(cpu, capacity);
@@ -341,7 +292,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
* frequency (by keeping the initial capacity_freq_ref value).
*/
cpu_clk = of_clk_get(cpu_node, 0);
- if (!PTR_ERR_OR_ZERO(cpu_clk)) {
+ if (!IS_ERR_OR_NULL(cpu_clk)) {
per_cpu(capacity_freq_ref, cpu) =
clk_get_rate(cpu_clk) / HZ_PER_KHZ;
clk_put(cpu_clk);
@@ -506,6 +457,10 @@ core_initcall(free_raw_capacity);
#endif
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+
+/* Used to enable the SMT control */
+static unsigned int max_smt_thread_num = 1;
+
/*
* This function returns the logic cpu number of the node.
* There are basically three kinds of return values:
@@ -565,6 +520,8 @@ static int __init parse_core(struct device_node *core, int package_id,
i++;
} while (1);
+ max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i);
+
cpu = get_cpu_for_node(core);
if (cpu >= 0) {
if (!leaf) {
@@ -677,6 +634,17 @@ static int __init parse_socket(struct device_node *socket)
if (!has_socket)
ret = parse_cluster(socket, 0, -1, 0);
+ /*
+ * Reset the max_smt_thread_num to 1 on failure. Since on failure
+ * we need to notify the framework the SMT is not supported, but
+ * max_smt_thread_num can be initialized to the SMT thread number
+ * of the cores which are successfully parsed.
+ */
+ if (ret)
+ max_smt_thread_num = 1;
+
+ cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+
return ret;
}
@@ -855,12 +823,106 @@ void remove_cpu_topology(unsigned int cpu)
clear_cpu_topology(cpu);
}
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+struct cpu_smt_info {
+ unsigned int thread_num;
+ int core_id;
+};
+
+static bool __init acpi_cpu_is_threaded(int cpu)
+{
+ int is_threaded = acpi_pptt_cpu_is_thread(cpu);
+
+ /*
+ * if the PPTT doesn't have thread information, check for architecture
+ * specific fallback if available
+ */
+ if (is_threaded < 0)
+ is_threaded = arch_cpu_is_threaded();
+
+ return !!is_threaded;
+}
+
+/*
+ * Propagate the topology information of the processor_topology_node tree to the
+ * cpu_topology array.
+ */
__weak int __init parse_acpi_topology(void)
{
+ unsigned int max_smt_thread_num = 1;
+ struct cpu_smt_info *entry;
+ struct xarray hetero_cpu;
+ unsigned long hetero_id;
+ int cpu, topology_id;
+
+ if (acpi_disabled)
+ return 0;
+
+ xa_init(&hetero_cpu);
+
+ for_each_possible_cpu(cpu) {
+ topology_id = find_acpi_cpu_topology(cpu, 0);
+ if (topology_id < 0)
+ return topology_id;
+
+ if (acpi_cpu_is_threaded(cpu)) {
+ cpu_topology[cpu].thread_id = topology_id;
+ topology_id = find_acpi_cpu_topology(cpu, 1);
+ cpu_topology[cpu].core_id = topology_id;
+
+ /*
+ * In the PPTT, CPUs below a node with the 'identical
+ * implementation' flag have the same number of threads.
+ * Count the number of threads for only one CPU (i.e.
+ * one core_id) among those with the same hetero_id.
+ * See the comment of find_acpi_cpu_topology_hetero_id()
+ * for more details.
+ *
+ * One entry is created for each node having:
+ * - the 'identical implementation' flag
+ * - its parent not having the flag
+ */
+ hetero_id = find_acpi_cpu_topology_hetero_id(cpu);
+ entry = xa_load(&hetero_cpu, hetero_id);
+ if (!entry) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ WARN_ON_ONCE(!entry);
+
+ if (entry) {
+ entry->core_id = topology_id;
+ entry->thread_num = 1;
+ xa_store(&hetero_cpu, hetero_id,
+ entry, GFP_KERNEL);
+ }
+ } else if (entry->core_id == topology_id) {
+ entry->thread_num++;
+ }
+ } else {
+ cpu_topology[cpu].thread_id = -1;
+ cpu_topology[cpu].core_id = topology_id;
+ }
+ topology_id = find_acpi_cpu_topology_cluster(cpu);
+ cpu_topology[cpu].cluster_id = topology_id;
+ topology_id = find_acpi_cpu_topology_package(cpu);
+ cpu_topology[cpu].package_id = topology_id;
+ }
+
+ /*
+ * This is a short loop since the number of XArray elements is the
+ * number of heterogeneous CPU clusters. On a homogeneous system
+ * there's only one entry in the XArray.
+ */
+ xa_for_each(&hetero_cpu, hetero_id, entry) {
+ max_smt_thread_num = max(max_smt_thread_num, entry->thread_num);
+ xa_erase(&hetero_cpu, hetero_id);
+ kfree(entry);
+ }
+
+ cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+ xa_destroy(&hetero_cpu);
return 0;
}
-#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
int cpu, ret;