diff options
Diffstat (limited to 'drivers/cpuidle/cpuidle.c')
| -rw-r--r-- | drivers/cpuidle/cpuidle.c | 128 |
1 files changed, 87 insertions, 41 deletions
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index ef2ea1b12cd8..c7876e9e024f 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -8,11 +8,13 @@ * This code is licenced under the GPL. */ +#include "linux/percpu-defs.h" #include <linux/clockchips.h> #include <linux/kernel.h> #include <linux/mutex.h> #include <linux/sched.h> #include <linux/sched/clock.h> +#include <linux/sched/idle.h> #include <linux/notifier.h> #include <linux/pm_qos.h> #include <linux/cpu.h> @@ -23,6 +25,7 @@ #include <linux/suspend.h> #include <linux/tick.h> #include <linux/mmu_context.h> +#include <linux/context_tracking.h> #include <trace/events/power.h> #include "cpuidle.h" @@ -66,11 +69,15 @@ int cpuidle_play_dead(void) if (!drv) return -ENODEV; - /* Find lowest-power state that supports long-term idle */ - for (i = drv->state_count - 1; i >= 0; i--) + for (i = drv->state_count - 1; i >= 0; i--) { if (drv->states[i].enter_dead) - return drv->states[i].enter_dead(dev, i); + drv->states[i].enter_dead(dev, i); + } + /* + * If :enter_dead() is successful, it will never return, so reaching + * here means that all of them failed above or were not present. + */ return -ENODEV; } @@ -134,13 +141,15 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, } #ifdef CONFIG_SUSPEND -static void enter_s2idle_proper(struct cpuidle_driver *drv, - struct cpuidle_device *dev, int index) +static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv, + struct cpuidle_device *dev, int index) { - ktime_t time_start, time_end; struct cpuidle_state *target_state = &drv->states[index]; + ktime_t time_start, time_end; + + instrumentation_begin(); - time_start = ns_to_ktime(local_clock()); + time_start = ns_to_ktime(local_clock_noinstr()); tick_freeze(); /* @@ -149,40 +158,48 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, * suspended is generally unsafe. */ stop_critical_timings(); - if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_enter(); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) { + ct_cpuidle_enter(); + /* Annotate away the indirect call */ + instrumentation_begin(); + } target_state->enter_s2idle(dev, drv, index); if (WARN_ON_ONCE(!irqs_disabled())) - local_irq_disable(); - if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_exit(); + raw_local_irq_disable(); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) { + instrumentation_end(); + ct_cpuidle_exit(); + } tick_unfreeze(); start_critical_timings(); - time_end = ns_to_ktime(local_clock()); + time_end = ns_to_ktime(local_clock_noinstr()); dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start); dev->states_usage[index].s2idle_usage++; + instrumentation_end(); } /** * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle. * @drv: cpuidle driver for the given CPU. * @dev: cpuidle device for the given CPU. + * @latency_limit_ns: Idle state exit latency limit * * If there are states with the ->enter_s2idle callback, find the deepest of * them and enter it with frozen tick. */ -int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) +int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev, + u64 latency_limit_ns) { int index; /* - * Find the deepest state with ->enter_s2idle present, which guarantees - * that interrupts won't be enabled when it exits and allows the tick to - * be frozen safely. + * Find the deepest state with ->enter_s2idle present that meets the + * specified latency limit, which guarantees that interrupts won't be + * enabled when it exits and allows the tick to be frozen safely. */ - index = find_deepest_state(drv, dev, U64_MAX, 0, true); + index = find_deepest_state(drv, dev, latency_limit_ns, 0, true); if (index > 0) { enter_s2idle_proper(drv, dev, index); local_irq_enable(); @@ -197,8 +214,9 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) * @drv: cpuidle driver for this cpu * @index: index into the states table in @drv of the state to enter */ -int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, - int index) +noinstr int cpuidle_enter_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) { int entered_state; @@ -206,6 +224,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); ktime_t time_start, time_end; + instrumentation_begin(); + /* * Tell the time framework to switch to a broadcast timer because our * local timer will be shut down. If a local timer is used from another @@ -214,44 +234,59 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, if (broadcast && tick_broadcast_enter()) { index = find_deepest_state(drv, dev, target_state->exit_latency_ns, CPUIDLE_FLAG_TIMER_STOP, false); - if (index < 0) { - default_idle_call(); - return -EBUSY; - } + target_state = &drv->states[index]; broadcast = false; } if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED) - leave_mm(dev->cpu); + leave_mm(); /* Take note of the planned idle state. */ sched_idle_set_state(target_state); trace_cpu_idle(index, dev->cpu); - time_start = ns_to_ktime(local_clock()); + time_start = ns_to_ktime(local_clock_noinstr()); stop_critical_timings(); - if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_enter(); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) { + ct_cpuidle_enter(); + /* Annotate away the indirect call */ + instrumentation_begin(); + } + + /* + * NOTE!! + * + * For cpuidle_state::enter() methods that do *NOT* set + * CPUIDLE_FLAG_RCU_IDLE RCU will be disabled here and these functions + * must be marked either noinstr or __cpuidle. + * + * For cpuidle_state::enter() methods that *DO* set + * CPUIDLE_FLAG_RCU_IDLE this isn't required, but they must mark the + * function calling ct_cpuidle_enter() as noinstr/__cpuidle and all + * functions called within the RCU-idle region. + */ entered_state = target_state->enter(dev, drv, index); - if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) - rcu_idle_exit(); + + if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter)) + raw_local_irq_disable(); + + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) { + instrumentation_end(); + ct_cpuidle_exit(); + } start_critical_timings(); sched_clock_idle_wakeup_event(); - time_end = ns_to_ktime(local_clock()); + time_end = ns_to_ktime(local_clock_noinstr()); trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); /* The cpu is no longer idle or about to enter idle. */ sched_idle_set_state(NULL); - if (broadcast) { - if (WARN_ON_ONCE(!irqs_disabled())) - local_irq_disable(); - + if (broadcast) tick_broadcast_exit(); - } if (!cpuidle_state_is_coupled(drv, index)) local_irq_enable(); @@ -278,6 +313,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, /* Shallower states are enabled, so update. */ dev->states_usage[entered_state].above++; + trace_cpu_idle_miss(dev->cpu, entered_state, false); break; } } else if (diff > delay) { @@ -289,8 +325,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * Update if a deeper state would have been a * better match for the observed idle duration. */ - if (diff - delay >= drv->states[i].target_residency_ns) + if (diff - delay >= drv->states[i].target_residency_ns) { dev->states_usage[entered_state].below++; + trace_cpu_idle_miss(dev->cpu, entered_state, true); + } break; } @@ -300,6 +338,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, dev->states_usage[index].rejected++; } + instrumentation_end(); + return entered_state; } @@ -372,7 +412,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) * Min polling interval of 10usec is a guess. It is assuming that * for most users, the time for a single ping-pong workload like * perf bench pipe would generally complete within 10usec but - * this is hardware dependant. Actual time can be estimated with + * this is hardware dependent. Actual time can be estimated with * * perf bench sched pipe -l 10000 * @@ -389,7 +429,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) * @dev: the cpuidle device * */ -u64 cpuidle_poll_time(struct cpuidle_driver *drv, +__cpuidle u64 cpuidle_poll_time(struct cpuidle_driver *drv, struct cpuidle_device *dev) { int i; @@ -597,8 +637,14 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) static int __cpuidle_register_device(struct cpuidle_device *dev) { struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + unsigned int cpu = dev->cpu; int i, ret; + if (per_cpu(cpuidle_devices, cpu)) { + pr_info("CPU%d: cpuidle device already registered\n", cpu); + return -EEXIST; + } + if (!try_module_get(drv->owner)) return -EINVAL; @@ -610,7 +656,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER; } - per_cpu(cpuidle_devices, dev->cpu) = dev; + per_cpu(cpuidle_devices, cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); ret = cpuidle_coupled_register_device(dev); @@ -771,7 +817,7 @@ static int __init cpuidle_init(void) if (cpuidle_disabled()) return -ENODEV; - return cpuidle_add_interface(cpu_subsys.dev_root); + return cpuidle_add_interface(); } module_param(off, int, 0444); |
