summaryrefslogtreecommitdiff
path: root/drivers/cpuidle
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-01-16 10:30:22 -0800
committerJakub Kicinski <kuba@kernel.org>2025-01-16 10:34:59 -0800
commit2ee738e90e80850582cbe10f34c6447965c1d87b (patch)
tree75a5d764d283bfac0648710f1e8c51680ef992fa /drivers/cpuidle
parentb44e27b4df1a1cd3fd84cf26c82156ed0301575f (diff)
parentce69b4019001407f9cd738dd2ba217b3a8ab831b (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.13-rc8). Conflicts: drivers/net/ethernet/realtek/r8169_main.c 1f691a1fc4be ("r8169: remove redundant hwmon support") 152d00a91396 ("r8169: simplify setting hwmon attribute visibility") https://lore.kernel.org/20250115122152.760b4e8d@canb.auug.org.au Adjacent changes: drivers/net/ethernet/broadcom/bnxt/bnxt.c 152f4da05aee ("bnxt_en: add support for rx-copybreak ethtool command") f0aa6a37a3db ("eth: bnxt: always recalculate features after XDP clearing, fix null-deref") drivers/net/ethernet/intel/ice/ice_type.h 50327223a8bb ("ice: add lock to protect low latency interface") dc26548d729e ("ice: Fix quad registers read on E825") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c4
-rw-r--r--drivers/cpuidle/governors/teo.c91
2 files changed, 50 insertions, 45 deletions
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 14462c092039..0c92a628bbd4 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -504,12 +504,12 @@ static int sbi_cpuidle_probe(struct platform_device *pdev)
int cpu, ret;
struct cpuidle_driver *drv;
struct cpuidle_device *dev;
- struct device_node *np, *pds_node;
+ struct device_node *pds_node;
/* Detect OSI support based on CPU DT nodes */
sbi_cpuidle_use_osi = true;
for_each_possible_cpu(cpu) {
- np = of_cpu_device_node_get(cpu);
+ struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
if (np &&
of_property_present(np, "power-domains") &&
of_property_present(np, "power-domain-names")) {
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index f2992f92d8db..173ddcac540a 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -10,25 +10,27 @@
* DOC: teo-description
*
* The idea of this governor is based on the observation that on many systems
- * timer events are two or more orders of magnitude more frequent than any
- * other interrupts, so they are likely to be the most significant cause of CPU
- * wakeups from idle states. Moreover, information about what happened in the
- * (relatively recent) past can be used to estimate whether or not the deepest
- * idle state with target residency within the (known) time till the closest
- * timer event, referred to as the sleep length, is likely to be suitable for
- * the upcoming CPU idle period and, if not, then which of the shallower idle
- * states to choose instead of it.
+ * timer interrupts are two or more orders of magnitude more frequent than any
+ * other interrupt types, so they are likely to dominate CPU wakeup patterns.
+ * Moreover, in principle, the time when the next timer event is going to occur
+ * can be determined at the idle state selection time, although doing that may
+ * be costly, so it can be regarded as the most reliable source of information
+ * for idle state selection.
*
- * Of course, non-timer wakeup sources are more important in some use cases
- * which can be covered by taking a few most recent idle time intervals of the
- * CPU into account. However, even in that context it is not necessary to
- * consider idle duration values greater than the sleep length, because the
- * closest timer will ultimately wake up the CPU anyway unless it is woken up
- * earlier.
+ * Of course, non-timer wakeup sources are more important in some use cases,
+ * but even then it is generally unnecessary to consider idle duration values
+ * greater than the time time till the next timer event, referred as the sleep
+ * length in what follows, because the closest timer will ultimately wake up the
+ * CPU anyway unless it is woken up earlier.
*
- * Thus this governor estimates whether or not the prospective idle duration of
- * a CPU is likely to be significantly shorter than the sleep length and selects
- * an idle state for it accordingly.
+ * However, since obtaining the sleep length may be costly, the governor first
+ * checks if it can select a shallow idle state using wakeup pattern information
+ * from recent times, in which case it can do without knowing the sleep length
+ * at all. For this purpose, it counts CPU wakeup events and looks for an idle
+ * state whose target residency has not exceeded the idle duration (measured
+ * after wakeup) in the majority of relevant recent cases. If the target
+ * residency of that state is small enough, it may be used right away and the
+ * sleep length need not be determined.
*
* The computations carried out by this governor are based on using bins whose
* boundaries are aligned with the target residency parameter values of the CPU
@@ -39,7 +41,11 @@
* idle state 2, the third bin spans from the target residency of idle state 2
* up to, but not including, the target residency of idle state 3 and so on.
* The last bin spans from the target residency of the deepest idle state
- * supplied by the driver to infinity.
+ * supplied by the driver to the scheduler tick period length or to infinity if
+ * the tick period length is less than the target residency of that state. In
+ * the latter case, the governor also counts events with the measured idle
+ * duration between the tick period length and the target residency of the
+ * deepest idle state.
*
* Two metrics called "hits" and "intercepts" are associated with each bin.
* They are updated every time before selecting an idle state for the given CPU
@@ -49,47 +55,46 @@
* sleep length and the idle duration measured after CPU wakeup fall into the
* same bin (that is, the CPU appears to wake up "on time" relative to the sleep
* length). In turn, the "intercepts" metric reflects the relative frequency of
- * situations in which the measured idle duration is so much shorter than the
- * sleep length that the bin it falls into corresponds to an idle state
- * shallower than the one whose bin is fallen into by the sleep length (these
- * situations are referred to as "intercepts" below).
+ * non-timer wakeup events for which the measured idle duration falls into a bin
+ * that corresponds to an idle state shallower than the one whose bin is fallen
+ * into by the sleep length (these events are also referred to as "intercepts"
+ * below).
*
* In order to select an idle state for a CPU, the governor takes the following
* steps (modulo the possible latency constraint that must be taken into account
* too):
*
- * 1. Find the deepest CPU idle state whose target residency does not exceed
- * the current sleep length (the candidate idle state) and compute 2 sums as
- * follows:
+ * 1. Find the deepest enabled CPU idle state (the candidate idle state) and
+ * compute 2 sums as follows:
*
- * - The sum of the "hits" and "intercepts" metrics for the candidate state
- * and all of the deeper idle states (it represents the cases in which the
- * CPU was idle long enough to avoid being intercepted if the sleep length
- * had been equal to the current one).
+ * - The sum of the "hits" metric for all of the idle states shallower than
+ * the candidate one (it represents the cases in which the CPU was likely
+ * woken up by a timer).
*
- * - The sum of the "intercepts" metrics for all of the idle states shallower
- * than the candidate one (it represents the cases in which the CPU was not
- * idle long enough to avoid being intercepted if the sleep length had been
- * equal to the current one).
+ * - The sum of the "intercepts" metric for all of the idle states shallower
+ * than the candidate one (it represents the cases in which the CPU was
+ * likely woken up by a non-timer wakeup source).
*
- * 2. If the second sum is greater than the first one the CPU is likely to wake
- * up early, so look for an alternative idle state to select.
+ * 2. If the second sum computed in step 1 is greater than a half of the sum of
+ * both metrics for the candidate state bin and all subsequent bins(if any),
+ * a shallower idle state is likely to be more suitable, so look for it.
*
- * - Traverse the idle states shallower than the candidate one in the
+ * - Traverse the enabled idle states shallower than the candidate one in the
* descending order.
*
* - For each of them compute the sum of the "intercepts" metrics over all
* of the idle states between it and the candidate one (including the
* former and excluding the latter).
*
- * - If each of these sums that needs to be taken into account (because the
- * check related to it has indicated that the CPU is likely to wake up
- * early) is greater than a half of the corresponding sum computed in step
- * 1 (which means that the target residency of the state in question had
- * not exceeded the idle duration in over a half of the relevant cases),
- * select the given idle state instead of the candidate one.
+ * - If this sum is greater than a half of the second sum computed in step 1,
+ * use the given idle state as the new candidate one.
*
- * 3. By default, select the candidate state.
+ * 3. If the current candidate state is state 0 or its target residency is short
+ * enough, return it and prevent the scheduler tick from being stopped.
+ *
+ * 4. Obtain the sleep length value and check if it is below the target
+ * residency of the current candidate state, in which case a new shallower
+ * candidate state needs to be found, so look for it.
*/
#include <linux/cpuidle.h>