summaryrefslogtreecommitdiff
path: root/drivers/cpuidle/cpuidle.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle/cpuidle.c')
-rw-r--r--drivers/cpuidle/cpuidle.c128
1 files changed, 87 insertions, 41 deletions
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index ef2ea1b12cd8..c7876e9e024f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -8,11 +8,13 @@
* This code is licenced under the GPL.
*/
+#include "linux/percpu-defs.h"
#include <linux/clockchips.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
@@ -23,6 +25,7 @@
#include <linux/suspend.h>
#include <linux/tick.h>
#include <linux/mmu_context.h>
+#include <linux/context_tracking.h>
#include <trace/events/power.h>
#include "cpuidle.h"
@@ -66,11 +69,15 @@ int cpuidle_play_dead(void)
if (!drv)
return -ENODEV;
- /* Find lowest-power state that supports long-term idle */
- for (i = drv->state_count - 1; i >= 0; i--)
+ for (i = drv->state_count - 1; i >= 0; i--) {
if (drv->states[i].enter_dead)
- return drv->states[i].enter_dead(dev, i);
+ drv->states[i].enter_dead(dev, i);
+ }
+ /*
+ * If :enter_dead() is successful, it will never return, so reaching
+ * here means that all of them failed above or were not present.
+ */
return -ENODEV;
}
@@ -134,13 +141,15 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
}
#ifdef CONFIG_SUSPEND
-static void enter_s2idle_proper(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int index)
+static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
{
- ktime_t time_start, time_end;
struct cpuidle_state *target_state = &drv->states[index];
+ ktime_t time_start, time_end;
+
+ instrumentation_begin();
- time_start = ns_to_ktime(local_clock());
+ time_start = ns_to_ktime(local_clock_noinstr());
tick_freeze();
/*
@@ -149,40 +158,48 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
* suspended is generally unsafe.
*/
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_enter();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_exit();
+ raw_local_irq_disable();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
+ ct_cpuidle_exit();
+ }
tick_unfreeze();
start_critical_timings();
- time_end = ns_to_ktime(local_clock());
+ time_end = ns_to_ktime(local_clock_noinstr());
dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
dev->states_usage[index].s2idle_usage++;
+ instrumentation_end();
}
/**
* cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
* @drv: cpuidle driver for the given CPU.
* @dev: cpuidle device for the given CPU.
+ * @latency_limit_ns: Idle state exit latency limit
*
* If there are states with the ->enter_s2idle callback, find the deepest of
* them and enter it with frozen tick.
*/
-int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ u64 latency_limit_ns)
{
int index;
/*
- * Find the deepest state with ->enter_s2idle present, which guarantees
- * that interrupts won't be enabled when it exits and allows the tick to
- * be frozen safely.
+ * Find the deepest state with ->enter_s2idle present that meets the
+ * specified latency limit, which guarantees that interrupts won't be
+ * enabled when it exits and allows the tick to be frozen safely.
*/
- index = find_deepest_state(drv, dev, U64_MAX, 0, true);
+ index = find_deepest_state(drv, dev, latency_limit_ns, 0, true);
if (index > 0) {
enter_s2idle_proper(drv, dev, index);
local_irq_enable();
@@ -197,8 +214,9 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* @drv: cpuidle driver for this cpu
* @index: index into the states table in @drv of the state to enter
*/
-int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
- int index)
+noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int entered_state;
@@ -206,6 +224,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
ktime_t time_start, time_end;
+ instrumentation_begin();
+
/*
* Tell the time framework to switch to a broadcast timer because our
* local timer will be shut down. If a local timer is used from another
@@ -214,44 +234,59 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
if (broadcast && tick_broadcast_enter()) {
index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
CPUIDLE_FLAG_TIMER_STOP, false);
- if (index < 0) {
- default_idle_call();
- return -EBUSY;
- }
+
target_state = &drv->states[index];
broadcast = false;
}
if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
- leave_mm(dev->cpu);
+ leave_mm();
/* Take note of the planned idle state. */
sched_idle_set_state(target_state);
trace_cpu_idle(index, dev->cpu);
- time_start = ns_to_ktime(local_clock());
+ time_start = ns_to_ktime(local_clock_noinstr());
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_enter();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
+
+ /*
+ * NOTE!!
+ *
+ * For cpuidle_state::enter() methods that do *NOT* set
+ * CPUIDLE_FLAG_RCU_IDLE RCU will be disabled here and these functions
+ * must be marked either noinstr or __cpuidle.
+ *
+ * For cpuidle_state::enter() methods that *DO* set
+ * CPUIDLE_FLAG_RCU_IDLE this isn't required, but they must mark the
+ * function calling ct_cpuidle_enter() as noinstr/__cpuidle and all
+ * functions called within the RCU-idle region.
+ */
entered_state = target_state->enter(dev, drv, index);
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_exit();
+
+ if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
+ raw_local_irq_disable();
+
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
+ ct_cpuidle_exit();
+ }
start_critical_timings();
sched_clock_idle_wakeup_event();
- time_end = ns_to_ktime(local_clock());
+ time_end = ns_to_ktime(local_clock_noinstr());
trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
/* The cpu is no longer idle or about to enter idle. */
sched_idle_set_state(NULL);
- if (broadcast) {
- if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
-
+ if (broadcast)
tick_broadcast_exit();
- }
if (!cpuidle_state_is_coupled(drv, index))
local_irq_enable();
@@ -278,6 +313,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
/* Shallower states are enabled, so update. */
dev->states_usage[entered_state].above++;
+ trace_cpu_idle_miss(dev->cpu, entered_state, false);
break;
}
} else if (diff > delay) {
@@ -289,8 +325,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* Update if a deeper state would have been a
* better match for the observed idle duration.
*/
- if (diff - delay >= drv->states[i].target_residency_ns)
+ if (diff - delay >= drv->states[i].target_residency_ns) {
dev->states_usage[entered_state].below++;
+ trace_cpu_idle_miss(dev->cpu, entered_state, true);
+ }
break;
}
@@ -300,6 +338,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
dev->states_usage[index].rejected++;
}
+ instrumentation_end();
+
return entered_state;
}
@@ -372,7 +412,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
* Min polling interval of 10usec is a guess. It is assuming that
* for most users, the time for a single ping-pong workload like
* perf bench pipe would generally complete within 10usec but
- * this is hardware dependant. Actual time can be estimated with
+ * this is hardware dependent. Actual time can be estimated with
*
* perf bench sched pipe -l 10000
*
@@ -389,7 +429,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
* @dev: the cpuidle device
*
*/
-u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+__cpuidle u64 cpuidle_poll_time(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
int i;
@@ -597,8 +637,14 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
static int __cpuidle_register_device(struct cpuidle_device *dev)
{
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ unsigned int cpu = dev->cpu;
int i, ret;
+ if (per_cpu(cpuidle_devices, cpu)) {
+ pr_info("CPU%d: cpuidle device already registered\n", cpu);
+ return -EEXIST;
+ }
+
if (!try_module_get(drv->owner))
return -EINVAL;
@@ -610,7 +656,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER;
}
- per_cpu(cpuidle_devices, dev->cpu) = dev;
+ per_cpu(cpuidle_devices, cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices);
ret = cpuidle_coupled_register_device(dev);
@@ -771,7 +817,7 @@ static int __init cpuidle_init(void)
if (cpuidle_disabled())
return -ENODEV;
- return cpuidle_add_interface(cpu_subsys.dev_root);
+ return cpuidle_add_interface();
}
module_param(off, int, 0444);