summaryrefslogtreecommitdiff
path: root/drivers/thermal/intel/intel_hfi.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/thermal/intel/intel_hfi.c')
-rw-r--r--drivers/thermal/intel/intel_hfi.c251
1 files changed, 204 insertions, 47 deletions
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 6e604bda2b93..8a2f441cd2ec 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -24,6 +24,7 @@
#include <linux/bitops.h>
#include <linux/cpufeature.h>
#include <linux/cpumask.h>
+#include <linux/delay.h>
#include <linux/gfp.h>
#include <linux/io.h>
#include <linux/kernel.h>
@@ -34,16 +35,19 @@
#include <linux/processor.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <linux/topology.h>
#include <linux/workqueue.h>
#include <asm/msr.h>
-#include "../thermal_core.h"
#include "intel_hfi.h"
#include "thermal_interrupt.h"
+#include "../thermal_netlink.h"
+
/* Hardware Feedback Interface MSR configuration bits */
#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
@@ -155,14 +159,15 @@ struct hfi_cpu_info {
static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
static int max_hfi_instances;
+static int hfi_clients_nr;
static struct hfi_instance *hfi_instances;
static struct hfi_features hfi_features;
static DEFINE_MUTEX(hfi_instance_lock);
static struct workqueue_struct *hfi_updates_wq;
-#define HFI_UPDATE_INTERVAL HZ
-#define HFI_MAX_THERM_NOTIFY_COUNT 16
+#define HFI_UPDATE_DELAY_MS 100
+#define HFI_THERMNL_CAPS_PER_EVENT 64
static void get_hfi_caps(struct hfi_instance *hfi_instance,
struct thermal_genl_cpu_caps *cpu_caps)
@@ -213,14 +218,14 @@ static void update_capabilities(struct hfi_instance *hfi_instance)
get_hfi_caps(hfi_instance, cpu_caps);
- if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+ if (cpu_count < HFI_THERMNL_CAPS_PER_EVENT)
goto last_cmd;
- /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+ /* Process complete chunks of HFI_THERMNL_CAPS_PER_EVENT capabilities. */
for (i = 0;
- (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
- i += HFI_MAX_THERM_NOTIFY_COUNT)
- thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+ (i + HFI_THERMNL_CAPS_PER_EVENT) <= cpu_count;
+ i += HFI_THERMNL_CAPS_PER_EVENT)
+ thermal_genl_cpu_capability_event(HFI_THERMNL_CAPS_PER_EVENT,
&cpu_caps[i]);
cpu_count = cpu_count - i;
@@ -279,7 +284,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
if (!raw_spin_trylock(&hfi_instance->event_lock))
return;
- rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
+ rdmsrq(MSR_IA32_PACKAGE_THERM_STATUS, msr);
hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
if (!hfi) {
raw_spin_unlock(&hfi_instance->event_lock);
@@ -317,7 +322,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
raw_spin_unlock(&hfi_instance->event_lock);
queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
- HFI_UPDATE_INTERVAL);
+ msecs_to_jiffies(HFI_UPDATE_DELAY_MS));
}
static void init_hfi_cpu_index(struct hfi_cpu_info *info)
@@ -346,14 +351,60 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance)
hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
}
+/* Caller must hold hfi_instance_lock. */
+static void hfi_enable(void)
+{
+ u64 msr_val;
+
+ rdmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+}
+
+static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
+{
+ phys_addr_t hw_table_pa;
+ u64 msr_val;
+
+ hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+ msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+ wrmsrq(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+}
+
+/* Caller must hold hfi_instance_lock. */
+static void hfi_disable(void)
+{
+ u64 msr_val;
+ int i;
+
+ rdmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrq(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+ /*
+ * Wait for hardware to acknowledge the disabling of HFI. Some
+ * processors may not do it. Wait for ~2ms. This is a reasonable
+ * time for hardware to complete any pending actions on the HFI
+ * memory.
+ */
+ for (i = 0; i < 2000; i++) {
+ rdmsrq(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED)
+ break;
+
+ udelay(1);
+ cpu_relax();
+ }
+}
+
/**
* intel_hfi_online() - Enable HFI on @cpu
* @cpu: CPU in which the HFI will be enabled
*
- * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
- * level. The first CPU in the die/package to come online does the full HFI
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the package
+ * level. The first CPU in the package to come online does the full HFI
* initialization. Subsequent CPUs will just link themselves to the HFI
- * instance of their die/package.
+ * instance of their package.
*
* This function is called before enabling the thermal vector in the local APIC
* in order to ensure that @cpu has an associated HFI instance when it receives
@@ -363,53 +414,52 @@ void intel_hfi_online(unsigned int cpu)
{
struct hfi_instance *hfi_instance;
struct hfi_cpu_info *info;
- phys_addr_t hw_table_pa;
- u64 msr_val;
- u16 die_id;
+ u16 pkg_id;
/* Nothing to do if hfi_instances are missing. */
if (!hfi_instances)
return;
/*
- * Link @cpu to the HFI instance of its package/die. It does not
+ * Link @cpu to the HFI instance of its package. It does not
* matter whether the instance has been initialized.
*/
info = &per_cpu(hfi_cpu_info, cpu);
- die_id = topology_logical_die_id(cpu);
+ pkg_id = topology_logical_package_id(cpu);
hfi_instance = info->hfi_instance;
if (!hfi_instance) {
- if (die_id >= max_hfi_instances)
+ if (pkg_id >= max_hfi_instances)
return;
- hfi_instance = &hfi_instances[die_id];
+ hfi_instance = &hfi_instances[pkg_id];
info->hfi_instance = hfi_instance;
}
init_hfi_cpu_index(info);
/*
- * Now check if the HFI instance of the package/die of @cpu has been
+ * Now check if the HFI instance of the package of @cpu has been
* initialized (by checking its header). In such case, all we have to
- * do is to add @cpu to this instance's cpumask.
+ * do is to add @cpu to this instance's cpumask and enable the instance
+ * if needed.
*/
mutex_lock(&hfi_instance_lock);
- if (hfi_instance->hdr) {
- cpumask_set_cpu(cpu, hfi_instance->cpus);
- goto unlock;
- }
+ if (hfi_instance->hdr)
+ goto enable;
/*
* Hardware is programmed with the physical address of the first page
* frame of the table. Hence, the allocated memory must be page-aligned.
+ *
+ * Some processors do not forget the initial address of the HFI table
+ * even after having been reprogrammed. Keep using the same pages. Do
+ * not free them.
*/
hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
GFP_KERNEL | __GFP_ZERO);
if (!hfi_instance->hw_table)
goto unlock;
- hw_table_pa = virt_to_phys(hfi_instance->hw_table);
-
/*
* Allocate memory to keep a local copy of the table that
* hardware generates.
@@ -419,31 +469,23 @@ void intel_hfi_online(unsigned int cpu)
if (!hfi_instance->local_table)
goto free_hw_table;
- /*
- * Program the address of the feedback table of this die/package. On
- * some processors, hardware remembers the old address of the HFI table
- * even after having been reprogrammed and re-enabled. Thus, do not free
- * the pages allocated for the table or reprogram the hardware with a
- * new base address. Namely, program the hardware only once.
- */
- msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
-
init_hfi_instance(hfi_instance);
INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
raw_spin_lock_init(&hfi_instance->table_lock);
raw_spin_lock_init(&hfi_instance->event_lock);
+enable:
cpumask_set_cpu(cpu, hfi_instance->cpus);
/*
- * Enable the hardware feedback interface and never disable it. See
- * comment on programming the address of the table.
+ * Enable this HFI instance if this is its first online CPU and
+ * there are user-space clients of thermal events.
*/
- rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
- msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_clients_nr > 0) {
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+ }
unlock:
mutex_unlock(&hfi_instance_lock);
@@ -462,7 +504,7 @@ free_hw_table:
*
* On some processors, hardware remembers previous programming settings even
* after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
- * die/package of @cpu are offline. See note in intel_hfi_online().
+ * package of @cpu are offline. See note in intel_hfi_online().
*/
void intel_hfi_offline(unsigned int cpu)
{
@@ -483,6 +525,10 @@ void intel_hfi_offline(unsigned int cpu)
mutex_lock(&hfi_instance_lock);
cpumask_clear_cpu(cpu, hfi_instance->cpus);
+
+ if (!cpumask_weight(hfi_instance->cpus))
+ hfi_disable();
+
mutex_unlock(&hfi_instance_lock);
}
@@ -531,6 +577,99 @@ static __init int hfi_parse_features(void)
return 0;
}
+/*
+ * If concurrency is not prevented by other means, the HFI enable/disable
+ * routines must be called under hfi_instance_lock."
+ */
+static void hfi_enable_instance(void *ptr)
+{
+ hfi_set_hw_table(ptr);
+ hfi_enable();
+}
+
+static void hfi_disable_instance(void *ptr)
+{
+ hfi_disable();
+}
+
+static void hfi_syscore_resume(void *data)
+{
+ /* This code runs only on the boot CPU. */
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ struct hfi_instance *hfi_instance = info->hfi_instance;
+
+ /* No locking needed. There is no concurrency with CPU online. */
+ if (hfi_clients_nr > 0)
+ hfi_enable_instance(hfi_instance);
+}
+
+static int hfi_syscore_suspend(void *data)
+{
+ /* No locking needed. There is no concurrency with CPU offline. */
+ hfi_disable();
+
+ return 0;
+}
+
+static const struct syscore_ops hfi_pm_ops = {
+ .resume = hfi_syscore_resume,
+ .suspend = hfi_syscore_suspend,
+};
+
+static struct syscore hfi_pm = {
+ .ops = &hfi_pm_ops,
+};
+
+static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
+ void *_notify)
+{
+ struct thermal_genl_notify *notify = _notify;
+ struct hfi_instance *hfi_instance;
+ smp_call_func_t func = NULL;
+ unsigned int cpu;
+ int i;
+
+ if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
+ return NOTIFY_DONE;
+
+ if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
+ return NOTIFY_DONE;
+
+ mutex_lock(&hfi_instance_lock);
+
+ switch (state) {
+ case THERMAL_NOTIFY_BIND:
+ if (++hfi_clients_nr == 1)
+ func = hfi_enable_instance;
+ break;
+ case THERMAL_NOTIFY_UNBIND:
+ if (--hfi_clients_nr == 0)
+ func = hfi_disable_instance;
+ break;
+ }
+
+ if (!func)
+ goto out;
+
+ for (i = 0; i < max_hfi_instances; i++) {
+ hfi_instance = &hfi_instances[i];
+ if (cpumask_empty(hfi_instance->cpus))
+ continue;
+
+ cpu = cpumask_any(hfi_instance->cpus);
+ smp_call_function_single(cpu, func, hfi_instance, true);
+ }
+
+out:
+ mutex_unlock(&hfi_instance_lock);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block hfi_thermal_nb = {
+ .notifier_call = hfi_thermal_notify,
+};
+
void __init intel_hfi_init(void)
{
struct hfi_instance *hfi_instance;
@@ -539,9 +678,13 @@ void __init intel_hfi_init(void)
if (hfi_parse_features())
return;
- /* There is one HFI instance per die/package. */
- max_hfi_instances = topology_max_packages() *
- topology_max_die_per_package();
+ /*
+ * Note: HFI resources are managed at the physical package scope.
+ * There could be platforms that enumerate packages as Linux dies.
+ * Special handling would be needed if this happens on an HFI-capable
+ * platform.
+ */
+ max_hfi_instances = topology_max_packages();
/*
* This allocation may fail. CPU hotplug callbacks must check
@@ -562,8 +705,22 @@ void __init intel_hfi_init(void)
if (!hfi_updates_wq)
goto err_nomem;
+ /*
+ * Both thermal core and Intel HFI can not be build as modules.
+ * As kernel build-in drivers they are initialized before user-space
+ * starts, hence we can not miss BIND/UNBIND events when applications
+ * add/remove thermal multicast group to/from a netlink socket.
+ */
+ if (thermal_genl_register_notifier(&hfi_thermal_nb))
+ goto err_nl_notif;
+
+ register_syscore(&hfi_pm);
+
return;
+err_nl_notif:
+ destroy_workqueue(hfi_updates_wq);
+
err_nomem:
for (j = 0; j < i; ++j) {
hfi_instance = &hfi_instances[j];