summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu18
-rw-r--r--Documentation/admin-guide/pm/cpufreq.rst18
-rw-r--r--Documentation/admin-guide/pm/cpuidle.rst8
-rw-r--r--Documentation/admin-guide/pm/index.rst2
-rw-r--r--Documentation/admin-guide/pm/intel_epb.rst41
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst32
-rw-r--r--Documentation/admin-guide/pm/sleep-states.rst8
-rw-r--r--Documentation/admin-guide/pm/strategies.rst8
-rw-r--r--Documentation/admin-guide/pm/system-wide.rst2
-rw-r--r--Documentation/admin-guide/pm/working-state.rst3
-rw-r--r--Documentation/driver-api/pm/cpuidle.rst7
-rw-r--r--Documentation/driver-api/pm/devices.rst12
-rw-r--r--Documentation/driver-api/pm/index.rst2
-rw-r--r--Documentation/driver-api/pm/notifiers.rst8
-rw-r--r--Documentation/driver-api/pm/types.rst2
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/intel.c34
-rw-r--r--arch/x86/kernel/cpu/intel_epb.c216
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--drivers/acpi/processor_perflib.c2
-rw-r--r--drivers/base/power/domain.c118
-rw-r--r--drivers/base/power/domain_governor.c67
-rw-r--r--drivers/base/power/main.c70
-rw-r--r--drivers/base/power/wakeup.c6
-rw-r--r--drivers/cpufreq/Kconfig4
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c19
-rw-r--r--drivers/cpufreq/amd_freq_sensitivity.c2
-rw-r--r--drivers/cpufreq/armada-37xx-cpufreq.c22
-rw-r--r--drivers/cpufreq/armada-8k-cpufreq.c1
-rw-r--r--drivers/cpufreq/cpufreq.c100
-rw-r--r--drivers/cpufreq/cpufreq_governor.c2
-rw-r--r--drivers/cpufreq/cpufreq_stats.c15
-rw-r--r--drivers/cpufreq/freq_table.c3
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c4
-rw-r--r--drivers/cpufreq/intel_pstate.c65
-rw-r--r--drivers/cpufreq/kirkwood-cpufreq.c19
-rw-r--r--drivers/cpufreq/maple-cpufreq.c6
-rw-r--r--drivers/cpufreq/pasemi-cpufreq.c1
-rw-r--r--drivers/cpufreq/pmac32-cpufreq.c2
-rw-r--r--drivers/cpufreq/powernow-k8.c2
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq.c1
-rw-r--r--drivers/cpufreq/qoriq-cpufreq.c2
-rw-r--r--drivers/cpufreq/speedstep-centrino.c2
-rw-r--r--drivers/cpuidle/cpuidle-exynos.c2
-rw-r--r--drivers/cpuidle/cpuidle.c19
-rw-r--r--drivers/devfreq/devfreq-event.c2
-rw-r--r--drivers/devfreq/devfreq.c90
-rw-r--r--drivers/devfreq/event/exynos-ppmu.c2
-rw-r--r--drivers/devfreq/event/rockchip-dfi.c25
-rw-r--r--drivers/devfreq/exynos-bus.c8
-rw-r--r--drivers/devfreq/rk3399_dmc.c73
-rw-r--r--drivers/devfreq/tegra-devfreq.c7
-rw-r--r--drivers/firmware/Kconfig15
-rw-r--r--drivers/firmware/Makefile3
-rw-r--r--drivers/firmware/psci/Kconfig13
-rw-r--r--drivers/firmware/psci/Makefile4
-rw-r--r--drivers/firmware/psci/psci.c (renamed from drivers/firmware/psci.c)110
-rw-r--r--drivers/firmware/psci/psci_checker.c (renamed from drivers/firmware/psci_checker.c)0
-rw-r--r--drivers/opp/core.c54
-rw-r--r--include/linux/cpufreq.h14
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/cpuidle.h1
-rw-r--r--include/linux/pm_domain.h22
-rw-r--r--include/linux/pm_opp.h8
-rw-r--r--include/linux/suspend.h3
-rw-r--r--include/linux/tick.h7
-rw-r--r--include/soc/rockchip/rk3399_grf.h21
-rw-r--r--include/soc/rockchip/rockchip_sip.h1
-rw-r--r--include/trace/events/devfreq.h40
-rw-r--r--include/uapi/linux/psci.h7
-rw-r--r--kernel/power/hibernate.c5
-rw-r--r--kernel/power/main.c14
-rw-r--r--kernel/power/suspend.c13
-rw-r--r--kernel/power/user.c5
-rw-r--r--kernel/sched/cpufreq_schedutil.c21
-rw-r--r--kernel/time/tick-sched.c12
79 files changed, 1240 insertions, 390 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 5eea46fefcb2..4fb76c0e8d30 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -520,3 +520,21 @@ Description: Control Symetric Multi Threading (SMT)
If control status is "forceoff" or "notsupported" writes
are rejected.
+
+What: /sys/devices/system/cpu/cpu#/power/energy_perf_bias
+Date: March 2019
+Contact: linux-pm@vger.kernel.org
+Description: Intel Energy and Performance Bias Hint (EPB)
+
+ EPB for the given CPU in a sliding scale 0 - 15, where a value
+ of 0 corresponds to a hint preference for highest performance
+ and a value of 15 corresponds to the maximum energy savings.
+
+ In order to change the EPB value for the CPU, write either
+ a number in the 0 - 15 sliding scale above, or one of the
+ strings: "performance", "balance-performance", "normal",
+ "balance-power", "power" (that represent values reflected by
+ their meaning), to this attribute.
+
+ This attribute is present for all online CPUs supporting the
+ Intel EPB feature.
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 7eca9026a9ed..0c74a7784964 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
@@ -5,9 +8,10 @@
CPU Performance Scaling
=======================
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
The Concept of CPU Performance Scaling
======================================
@@ -396,8 +400,8 @@ RT or deadline scheduling classes, the governor will increase the frequency to
the allowed maximum (that is, the ``scaling_max_freq`` policy limit). In turn,
if it is invoked by the CFS scheduling class, the governor will use the
Per-Entity Load Tracking (PELT) metric for the root control group of the
-given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_
-LWN.net article for a description of the PELT mechanism). Then, the new
+given CPU as the CPU utilization estimate (see the *Per-entity load tracking*
+LWN.net article [1]_ for a description of the PELT mechanism). Then, the new
CPU frequency to apply is computed in accordance with the formula
f = 1.25 * ``f_0`` * ``util`` / ``max``
@@ -698,4 +702,8 @@ hardware feature (e.g. all Intel ones), even if the
:c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set.
-.. _Per-entity load tracking: https://lwn.net/Articles/531853/
+References
+==========
+
+.. [1] Jonathan Corbet, *Per-entity load tracking*,
+ https://lwn.net/Articles/531853/
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 9c58b35a81cb..e70b365dbc60 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
.. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
.. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>`
@@ -5,9 +8,10 @@
CPU Idle Time Management
========================
-::
+:Copyright: |copy| 2018 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2018 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Concepts
========
diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index 49237ac73442..39f8f9f81e7a 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
================
Power Management
================
diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst
new file mode 100644
index 000000000000..005121167af7
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_epb.rst
@@ -0,0 +1,41 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+======================================
+Intel Performance and Energy Bias Hint
+======================================
+
+:Copyright: |copy| 2019 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
+ :doc: overview
+
+Intel Performance and Energy Bias Attribute in ``sysfs``
+========================================================
+
+The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU
+can be checked or updated through a ``sysfs`` attribute (file) under
+:file:`/sys/devices/system/cpu/cpu<N>/power/`, where the CPU number ``<N>``
+is allocated at the system initialization time:
+
+``energy_perf_bias``
+ Shows the current EPB value for the CPU in a sliding scale 0 - 15, where
+ a value of 0 corresponds to a hint preference for highest performance
+ and a value of 15 corresponds to the maximum energy savings.
+
+ In order to update the EPB value for the CPU, this attribute can be
+ written to, either with a number in the 0 - 15 sliding scale above, or
+ with one of the strings: "performance", "balance-performance", "normal",
+ "balance-power", "power" that represent values reflected by their
+ meaning.
+
+ This attribute is present for all online CPUs supporting the EPB
+ feature.
+
+Note that while the EPB interface to the processor is defined at the logical CPU
+level, the physical register backing it may be shared by multiple CPUs (for
+example, SMT siblings or cores in one package). For this reason, updating the
+EPB value for one CPU may cause the EPB values for other CPUs to change.
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ec0f7c111f65..67e414e34f37 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -1,10 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
===============================================
``intel_pstate`` CPU Performance Scaling Driver
===============================================
-::
+:Copyright: |copy| 2017 Intel Corporation
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
General Information
@@ -20,11 +23,10 @@ you have not done that yet.]
For the processors supported by ``intel_pstate``, the P-state concept is broader
than just an operating frequency or an operating performance point (see the
-`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+LinuxCon Europe 2015 presentation by Kristen Accardi [1]_ for more
information about that). For this reason, the representation of P-states used
by ``intel_pstate`` internally follows the hardware specification (for details
-refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide <SDM_>`_). However, the ``CPUFreq`` core
+refer to Intel Software Developer’s Manual [2]_). However, the ``CPUFreq`` core
uses frequencies for identifying operating performance points of CPUs and
frequencies are involved in the user space interface exposed by it, so
``intel_pstate`` maps its internal representation of P-states to frequencies too
@@ -561,9 +563,9 @@ or to pin every task potentially sensitive to them to a specific CPU.]
On the majority of systems supported by ``intel_pstate``, the ACPI tables
provided by the platform firmware contain ``_PSS`` objects returning information
-that can be used for CPU performance scaling (refer to the `ACPI specification`_
-for details on the ``_PSS`` objects and the format of the information returned
-by them).
+that can be used for CPU performance scaling (refer to the ACPI specification
+[3]_ for details on the ``_PSS`` objects and the format of the information
+returned by them).
The information returned by the ACPI ``_PSS`` objects is used by the
``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate``
@@ -728,6 +730,14 @@ P-state is called, the ``ftrace`` filter can be set to to
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
-.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
-.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+References
+==========
+
+.. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*,
+ http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+
+.. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*,
+ http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+
+.. [3] *Advanced Configuration and Power Interface Specification*,
+ https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst
index dbf5acd49f35..cd3a28cb81f4 100644
--- a/Documentation/admin-guide/pm/sleep-states.rst
+++ b/Documentation/admin-guide/pm/sleep-states.rst
@@ -1,10 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
===================
System Sleep States
===================
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Sleep states are global low-power states of the entire system in which user
space code cannot be executed and the overall system activity is significantly
diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst
index afe4d3f831fe..dd0362e32fa5 100644
--- a/Documentation/admin-guide/pm/strategies.rst
+++ b/Documentation/admin-guide/pm/strategies.rst
@@ -1,10 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
===========================
Power Management Strategies
===========================
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
The Linux kernel supports two major high-level power management strategies.
diff --git a/Documentation/admin-guide/pm/system-wide.rst b/Documentation/admin-guide/pm/system-wide.rst
index 0c81e4c5de39..2b1f987b34f0 100644
--- a/Documentation/admin-guide/pm/system-wide.rst
+++ b/Documentation/admin-guide/pm/system-wide.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
============================
System-Wide Power Management
============================
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index b6cef9b5e961..fc298eb1234b 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
==============================
Working-State Power Management
==============================
@@ -8,3 +10,4 @@ Working-State Power Management
cpuidle
cpufreq
intel_pstate
+ intel_epb
diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst
index 5842ab621a58..006cf6db40c6 100644
--- a/Documentation/driver-api/pm/cpuidle.rst
+++ b/Documentation/driver-api/pm/cpuidle.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
.. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>`
.. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>`
.. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver <cpuidle_driver>`
@@ -7,9 +10,9 @@
CPU Idle Time Management
========================
-::
+:Copyright: |copy| 2019 Intel Corporation
- Copyright (c) 2019 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
CPU Idle Time Management Subsystem
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 090c151aa86b..30835683616a 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
.. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
.. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>`
@@ -12,11 +15,12 @@
Device Power Management Basics
==============================
-::
+:Copyright: |copy| 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+:Copyright: |copy| 2010 Alan Stern <stern@rowland.harvard.edu>
+:Copyright: |copy| 2016 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
- Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
- Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Most of the code in Linux is device drivers, so most of the Linux power
management (PM) code is also driver-specific. Most drivers will do very
diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst
index 56975c6bc789..c2a9ef8d115c 100644
--- a/Documentation/driver-api/pm/index.rst
+++ b/Documentation/driver-api/pm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
===============================
CPU and Device Power Management
===============================
diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst
index 62f860026992..186435c43b77 100644
--- a/Documentation/driver-api/pm/notifiers.rst
+++ b/Documentation/driver-api/pm/notifiers.rst
@@ -1,10 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
=============================
Suspend/Hibernation Notifiers
=============================
-::
+:Copyright: |copy| 2016 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
There are some operations that subsystems or drivers may want to carry out
before hibernation/suspend or after restore/resume, but they require the system
diff --git a/Documentation/driver-api/pm/types.rst b/Documentation/driver-api/pm/types.rst
index 3ebdecc54104..73a231caf764 100644
--- a/Documentation/driver-api/pm/types.rst
+++ b/Documentation/driver-api/pm/types.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
==================================
Device Power Management Data Types
==================================
diff --git a/MAINTAINERS b/MAINTAINERS
index d846ccd81235..bd40a852207b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4553,6 +4553,7 @@ S: Maintained
F: drivers/devfreq/
F: include/linux/devfreq.h
F: Documentation/devicetree/bindings/devfreq/
+F: include/trace/events/devfreq.h
DEVICE FREQUENCY EVENT (DEVFREQ-EVENT)
M: Chanwoo Choi <cw00.choi@samsung.com>
@@ -12416,7 +12417,7 @@ M: Mark Rutland <mark.rutland@arm.com>
M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
L: linux-arm-kernel@lists.infradead.org
S: Maintained
-F: drivers/firmware/psci*.c
+F: drivers/firmware/psci/
F: include/linux/psci.h
F: include/uapi/linux/psci.h
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfd24f9f7614..1796d2bdcaaa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37f7d438a6ef..37640544e12f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1824,23 +1824,6 @@ void cpu_init(void)
}
#endif
-static void bsp_resume(void)
-{
- if (this_cpu->c_bsp_resume)
- this_cpu->c_bsp_resume(&boot_cpu_data);
-}
-
-static struct syscore_ops cpu_syscore_ops = {
- .resume = bsp_resume,
-};
-
-static int __init init_cpu_syscore(void)
-{
- register_syscore_ops(&cpu_syscore_ops);
- return 0;
-}
-core_initcall(init_cpu_syscore);
-
/*
* The microcode loader calls this upon late microcode load to recheck features,
* only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 5eb946b9a9f3..c0e2407abdd6 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -14,7 +14,6 @@ struct cpu_dev {
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
void (*c_detect_tlb)(struct cpuinfo_x86 *);
- void (*c_bsp_resume)(struct cpuinfo_x86 *);
int c_x86_vendor;
#ifdef CONFIG_X86_32
/* Optional vendor specific routine to obtain the cache size. */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3142fd7a9b32..f17c1a714779 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -596,36 +596,6 @@ detect_keyid_bits:
c->x86_phys_bits -= keyid_bits;
}
-static void init_intel_energy_perf(struct cpuinfo_x86 *c)
-{
- u64 epb;
-
- /*
- * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
- * (x86_energy_perf_policy(8) is available to change it at run-time.)
- */
- if (!cpu_has(c, X86_FEATURE_EPB))
- return;
-
- rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
- if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
- return;
-
- pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
- epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
- wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
-}
-
-static void intel_bsp_resume(struct cpuinfo_x86 *c)
-{
- /*
- * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
- * so reinitialize it properly like during bootup:
- */
- init_intel_energy_perf(c);
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_TME))
detect_tme(c);
- init_intel_energy_perf(c);
-
init_intel_misc_features(c);
}
@@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = {
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
.c_init = init_intel,
- .c_bsp_resume = intel_bsp_resume,
.c_x86_vendor = X86_VENDOR_INTEL,
};
cpu_dev_register(intel_cpu_dev);
-
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
new file mode 100644
index 000000000000..f4dd73396f28
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Performance and Energy Bias Hint support.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/syscore_ops.h>
+#include <linux/pm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+
+/**
+ * DOC: overview
+ *
+ * The Performance and Energy Bias Hint (EPB) allows software to specify its
+ * preference with respect to the power-performance tradeoffs present in the
+ * processor. Generally, the EPB is expected to be set by user space (directly
+ * via sysfs or with the help of the x86_energy_perf_policy tool), but there are
+ * two reasons for the kernel to update it.
+ *
+ * First, there are systems where the platform firmware resets the EPB during
+ * system-wide transitions from sleep states back into the working state
+ * effectively causing the previous EPB updates by user space to be lost.
+ * Thus the kernel needs to save the current EPB values for all CPUs during
+ * system-wide transitions to sleep states and restore them on the way back to
+ * the working state. That can be achieved by saving EPB for secondary CPUs
+ * when they are taken offline during transitions into system sleep states and
+ * for the boot CPU in a syscore suspend operation, so that it can be restored
+ * for the boot CPU in a syscore resume operation and for the other CPUs when
+ * they are brought back online. However, CPUs that are already offline when
+ * a system-wide PM transition is started are not taken offline again, but their
+ * EPB values may still be reset by the platform firmware during the transition,
+ * so in fact it is necessary to save the EPB of any CPU taken offline and to
+ * restore it when the given CPU goes back online at all times.
+ *
+ * Second, on many systems the initial EPB value coming from the platform
+ * firmware is 0 ('performance') and at least on some of them that is because
+ * the platform firmware does not initialize EPB at all with the assumption that
+ * the OS will do that anyway. That sometimes is problematic, as it may cause
+ * the system battery to drain too fast, for example, so it is better to adjust
+ * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
+ * kernel changes it to 6 ('normal').
+ */
+
+static DEFINE_PER_CPU(u8, saved_epb);
+
+#define EPB_MASK 0x0fULL
+#define EPB_SAVED 0x10ULL
+#define MAX_EPB EPB_MASK
+
+static int intel_epb_save(void)
+{
+ u64 epb;
+
+ rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ /*
+ * Ensure that saved_epb will always be nonzero after this write even if
+ * the EPB value read from the MSR is 0.
+ */
+ this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
+
+ return 0;
+}
+
+static void intel_epb_restore(void)
+{
+ u64 val = this_cpu_read(saved_epb);
+ u64 epb;
+
+ rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ if (val) {
+ val &= EPB_MASK;
+ } else {
+ /*
+ * Because intel_epb_save() has not run for the current CPU yet,
+ * it is going online for the first time, so if its EPB value is
+ * 0 ('performance') at this point, assume that it has not been
+ * initialized by the platform firmware and set it to 6
+ * ('normal').
+ */
+ val = epb & EPB_MASK;
+ if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
+ val = ENERGY_PERF_BIAS_NORMAL;
+ pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ }
+ }
+ wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
+}
+
+static struct syscore_ops intel_epb_syscore_ops = {
+ .suspend = intel_epb_save,
+ .resume = intel_epb_restore,
+};
+
+static const char * const energy_perf_strings[] = {
+ "performance",
+ "balance-performance",
+ "normal",
+ "balance-power",
+ "power"
+};
+static const u8 energ_perf_values[] = {
+ ENERGY_PERF_BIAS_PERFORMANCE,
+ ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
+ ENERGY_PERF_BIAS_NORMAL,
+ ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
+ ENERGY_PERF_BIAS_POWERSAVE
+};
+
+static ssize_t energy_perf_bias_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ unsigned int cpu = dev->id;
+ u64 epb;
+ int ret;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%llu\n", epb);
+}
+
+static ssize_t energy_perf_bias_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int cpu = dev->id;
+ u64 epb, val;
+ int ret;
+
+ ret = __sysfs_match_string(energy_perf_strings,
+ ARRAY_SIZE(energy_perf_strings), buf);
+ if (ret >= 0)
+ val = energ_perf_values[ret];
+ else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
+ return -EINVAL;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret < 0)
+ return ret;
+
+ ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
+ (epb & ~EPB_MASK) | val);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(energy_perf_bias);
+
+static struct attribute *intel_epb_attrs[] = {
+ &dev_attr_energy_perf_bias.attr,
+ NULL
+};
+
+static const struct attribute_group intel_epb_attr_group = {
+ .name = power_group_name,
+ .attrs = intel_epb_attrs
+};
+
+static int intel_epb_online(unsigned int cpu)
+{
+ struct device *cpu_dev = get_cpu_device(cpu);
+
+ intel_epb_restore();
+ if (!cpuhp_tasks_frozen)
+ sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
+
+ return 0;
+}
+
+static int intel_epb_offline(unsigned int cpu)
+{
+ struct device *cpu_dev = get_cpu_device(cpu);
+
+ if (!cpuhp_tasks_frozen)
+ sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
+
+ intel_epb_save();
+ return 0;
+}
+
+static __init int intel_epb_init(void)
+{
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_EPB))
+ return -ENODEV;
+
+ ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
+ "x86/intel/epb:online", intel_epb_online,
+ intel_epb_offline);
+ if (ret < 0)
+ goto err_out_online;
+
+ register_syscore_ops(&intel_epb_syscore_ops);
+ return 0;
+
+err_out_online:
+ cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
+ return ret;
+}
+subsys_initcall(intel_epb_init);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index aab0c82e0a0d..15b5e98a86f9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void)
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
- * speed as the bootup CPU. (cpufreq notifiers will fix this
- * up if their speed diverges)
+ * speed as the bootup CPU.
*/
static void __init cyc2ns_init_secondary_cpus(void)
{
@@ -940,12 +939,12 @@ void tsc_restore_sched_clock_state(void)
}
#ifdef CONFIG_CPU_FREQ
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+/*
+ * Frequency scaling support. Adjust the TSC based timer when the CPU frequency
* changes.
*
- * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
- * not that important because current Opteron setups do not support
- * scaling on SMP anyroads.
+ * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
+ * as unstable and give up in those cases.
*
* Should fix up last_tsc too. Currently gettimeofday in the
* first tick after the change will be slightly wrong.
@@ -959,22 +958,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
- unsigned long *lpj;
- lpj = &boot_cpu_data.loops_per_jiffy;
-#ifdef CONFIG_SMP
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- lpj = &cpu_data(freq->cpu).loops_per_jiffy;
-#endif
+ if (num_online_cpus() > 1) {
+ mark_tsc_unstable("cpufreq changes on SMP");
+ return 0;
+ }
if (!ref_freq) {
ref_freq = freq->old;
- loops_per_jiffy_ref = *lpj;
+ loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
tsc_khz_ref = tsc_khz;
}
+
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
- *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+ boot_cpu_data.loops_per_jiffy =
+ cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index a303fd0e108c..c73d3a62799a 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -181,7 +181,7 @@ void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag)
acpi_processor_ppc_ost(pr->handle, 0);
}
if (ret >= 0)
- cpufreq_update_policy(pr->id);
+ cpufreq_update_limits(pr->id);
}
int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 96a6dc9d305c..3d899e8abd58 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -22,6 +22,7 @@
#include <linux/sched.h>
#include <linux/suspend.h>
#include <linux/export.h>
+#include <linux/cpu.h>
#include "power.h"
@@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
#define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE)
#define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON)
#define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
+#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
const struct generic_pm_domain *genpd)
@@ -391,11 +393,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
if (unlikely(!genpd->set_performance_state))
return -EINVAL;
- if (unlikely(!dev->power.subsys_data ||
- !dev->power.subsys_data->domain_data)) {
- WARN_ON(1);
+ if (WARN_ON(!dev->power.subsys_data ||
+ !dev->power.subsys_data->domain_data))
return -EINVAL;
- }
genpd_lock(genpd);
@@ -1396,8 +1396,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
#endif /* CONFIG_PM_SLEEP */
-static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
- struct gpd_timing_data *td)
+static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
{
struct generic_pm_domain_data *gpd_data;
int ret;
@@ -1412,9 +1411,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
goto err_put;
}
- if (td)
- gpd_data->td = *td;
-
gpd_data->base.dev = dev;
gpd_data->td.constraint_changed = true;
gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
@@ -1454,8 +1450,57 @@ static void genpd_free_dev_data(struct device *dev,
dev_pm_put_subsys_data(dev);
}
+static void genpd_update_cpumask(struct generic_pm_domain *genpd,
+ int cpu, bool set, unsigned int depth)
+{
+ struct gpd_link *link;
+
+ if (!genpd_is_cpu_domain(genpd))
+ return;
+
+ list_for_each_entry(link, &genpd->slave_links, slave_node) {
+ struct generic_pm_domain *master = link->master;
+
+ genpd_lock_nested(master, depth + 1);
+ genpd_update_cpumask(master, cpu, set, depth + 1);
+ genpd_unlock(master);
+ }
+
+ if (set)
+ cpumask_set_cpu(cpu, genpd->cpus);
+ else
+ cpumask_clear_cpu(cpu, genpd->cpus);
+}
+
+static void genpd_set_cpumask(struct generic_pm_domain *genpd, int cpu)
+{
+ if (cpu >= 0)
+ genpd_update_cpumask(genpd, cpu, true, 0);
+}
+
+static void genpd_clear_cpumask(struct generic_pm_domain *genpd, int cpu)
+{
+ if (cpu >= 0)
+ genpd_update_cpumask(genpd, cpu, false, 0);
+}
+
+static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
+{
+ int cpu;
+
+ if (!genpd_is_cpu_domain(genpd))
+ return -1;
+
+ for_each_possible_cpu(cpu) {
+ if (get_cpu_device(cpu) == dev)
+ return cpu;
+ }
+
+ return -1;
+}
+
static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
- struct gpd_timing_data *td)
+ struct device *base_dev)
{
struct generic_pm_domain_data *gpd_data;
int ret;
@@ -1465,16 +1510,19 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
return -EINVAL;
- gpd_data = genpd_alloc_dev_data(dev, td);
+ gpd_data = genpd_alloc_dev_data(dev);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
+ gpd_data->cpu = genpd_get_cpu(genpd, base_dev);
+
ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0;
if (ret)
goto out;
genpd_lock(genpd);
+ genpd_set_cpumask(genpd, gpd_data->cpu);
dev_pm_domain_set(dev, &genpd->domain);
genpd->device_count++;
@@ -1502,7 +1550,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
int ret;
mutex_lock(&gpd_list_lock);
- ret = genpd_add_device(genpd, dev, NULL);
+ ret = genpd_add_device(genpd, dev, dev);
mutex_unlock(&gpd_list_lock);
return ret;
@@ -1532,6 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
genpd->device_count--;
genpd->max_off_time_changed = true;
+ genpd_clear_cpumask(genpd, gpd_data->cpu);
dev_pm_domain_set(dev, NULL);
list_del_init(&pdd->list_node);
@@ -1686,6 +1735,12 @@ out:
}
EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
+static void genpd_free_default_power_state(struct genpd_power_state *states,
+ unsigned int state_count)
+{
+ kfree(states);
+}
+
static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
{
struct genpd_power_state *state;
@@ -1696,7 +1751,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
genpd->states = state;
genpd->state_count = 1;
- genpd->free = state;
+ genpd->free_states = genpd_free_default_power_state;
return 0;
}
@@ -1762,11 +1817,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
return -EINVAL;
+ if (genpd_is_cpu_domain(genpd) &&
+ !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
+ return -ENOMEM;
+
/* Use only one "off" state if there were no states declared */
if (genpd->state_count == 0) {
ret = genpd_set_default_power_state(genpd);
- if (ret)
+ if (ret) {
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
return ret;
+ }
} else if (!gov && genpd->state_count > 1) {
pr_warn("%s: no governor for states\n", genpd->name);
}
@@ -1812,7 +1874,11 @@ static int genpd_remove(struct generic_pm_domain *genpd)
list_del(&genpd->gpd_list_node);
genpd_unlock(genpd);
cancel_work_sync(&genpd->power_off_work);
- kfree(genpd->free);
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ if (genpd->free_states)
+ genpd->free_states(genpd->states, genpd->state_count);
+
pr_debug("%s: removed %s\n", __func__, genpd->name);
return 0;
@@ -2190,7 +2256,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
goto out;
}
- ret = genpd_add_device(genpd, dev, NULL);
+ ret = genpd_add_device(genpd, dev, dev);
out:
mutex_unlock(&gpd_list_lock);
@@ -2274,6 +2340,7 @@ EXPORT_SYMBOL_GPL(of_genpd_remove_last);
static void genpd_release_dev(struct device *dev)
{
+ of_node_put(dev->of_node);
kfree(dev);
}
@@ -2335,14 +2402,14 @@ static void genpd_dev_pm_sync(struct device *dev)
genpd_queue_power_off_work(pd);
}
-static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np,
+static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
unsigned int index, bool power_on)
{
struct of_phandle_args pd_args;
struct generic_pm_domain *pd;
int ret;
- ret = of_parse_phandle_with_args(np, "power-domains",
+ ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells", index, &pd_args);
if (ret < 0)
return ret;
@@ -2354,12 +2421,12 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np,
mutex_unlock(&gpd_list_lock);
dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
__func__, PTR_ERR(pd));
- return driver_deferred_probe_check_state(dev);
+ return driver_deferred_probe_check_state(base_dev);
}
dev_dbg(dev, "adding to PM domain %s\n", pd->name);
- ret = genpd_add_device(pd, dev, NULL);
+ ret = genpd_add_device(pd, dev, base_dev);
mutex_unlock(&gpd_list_lock);
if (ret < 0) {
@@ -2410,7 +2477,7 @@ int genpd_dev_pm_attach(struct device *dev)
"#power-domain-cells") != 1)
return 0;
- return __genpd_dev_pm_attach(dev, dev->of_node, 0, true);
+ return __genpd_dev_pm_attach(dev, dev, 0, true);
}
EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
@@ -2440,10 +2507,10 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
if (!dev->of_node)
return NULL;
- /* Deal only with devices using multiple PM domains. */
+ /* Verify that the index is within a valid range. */
num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells");
- if (num_domains < 2 || index >= num_domains)
+ if (index >= num_domains)
return NULL;
/* Allocate and register device on the genpd bus. */
@@ -2454,15 +2521,16 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
dev_set_name(virt_dev, "genpd:%u:%s", index, dev_name(dev));
virt_dev->bus = &genpd_bus_type;
virt_dev->release = genpd_release_dev;
+ virt_dev->of_node = of_node_get(dev->of_node);
ret = device_register(virt_dev);
if (ret) {
- kfree(virt_dev);
+ put_device(virt_dev);
return ERR_PTR(ret);
}
/* Try to attach the device to the PM domain at the specified index. */
- ret = __genpd_dev_pm_attach(virt_dev, dev->of_node, index, false);
+ ret = __genpd_dev_pm_attach(virt_dev, dev, index, false);
if (ret < 1) {
device_unregister(virt_dev);
return ret ? ERR_PTR(ret) : NULL;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 4d07e38a8247..7912bc957244 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -10,6 +10,9 @@
#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
#include <linux/hrtimer.h>
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/ktime.h>
static int dev_update_qos_constraint(struct device *dev, void *data)
{
@@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
struct generic_pm_domain *genpd = pd_to_genpd(pd);
struct gpd_link *link;
- if (!genpd->max_off_time_changed)
+ if (!genpd->max_off_time_changed) {
+ genpd->state_idx = genpd->cached_power_down_state_idx;
return genpd->cached_power_down_ok;
+ }
/*
* We have to invalidate the cached results for the masters, so
@@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
genpd->state_idx--;
}
+ genpd->cached_power_down_state_idx = genpd->state_idx;
return genpd->cached_power_down_ok;
}
@@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
return false;
}
+#ifdef CONFIG_CPU_IDLE
+static bool cpu_power_down_ok(struct dev_pm_domain *pd)
+{
+ struct generic_pm_domain *genpd = pd_to_genpd(pd);
+ struct cpuidle_device *dev;
+ ktime_t domain_wakeup, next_hrtimer;
+ s64 idle_duration_ns;
+ int cpu, i;
+
+ /* Validate dev PM QoS constraints. */
+ if (!default_power_down_ok(pd))
+ return false;
+
+ if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
+ return true;
+
+ /*
+ * Find the next wakeup for any of the online CPUs within the PM domain
+ * and its subdomains. Note, we only need the genpd->cpus, as it already
+ * contains a mask of all CPUs from subdomains.
+ */
+ domain_wakeup = ktime_set(KTIME_SEC_MAX, 0);
+ for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) {
+ dev = per_cpu(cpuidle_devices, cpu);
+ if (dev) {
+ next_hrtimer = READ_ONCE(dev->next_hrtimer);
+ if (ktime_before(next_hrtimer, domain_wakeup))
+ domain_wakeup = next_hrtimer;
+ }
+ }
+
+ /* The minimum idle duration is from now - until the next wakeup. */
+ idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get()));
+ if (idle_duration_ns <= 0)
+ return false;
+
+ /*
+ * Find the deepest idle state that has its residency value satisfied
+ * and by also taking into account the power off latency for the state.
+ * Start at the state picked by the dev PM QoS constraint validation.
+ */
+ i = genpd->state_idx;
+ do {
+ if (idle_duration_ns >= (genpd->states[i].residency_ns +
+ genpd->states[i].power_off_latency_ns)) {
+ genpd->state_idx = i;
+ return true;
+ }
+ } while (--i >= 0);
+
+ return false;
+}
+
+struct dev_power_governor pm_domain_cpu_gov = {
+ .suspend_ok = default_suspend_ok,
+ .power_down_ok = cpu_power_down_ok,
+};
+#endif
+
struct dev_power_governor simple_qos_governor = {
.suspend_ok = default_suspend_ok,
.power_down_ok = default_power_down_ok,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index f80d298de3fa..43e863cc0c1b 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -478,7 +478,7 @@ struct dpm_watchdog {
/**
* dpm_watchdog_handler - Driver suspend / resume watchdog handler.
- * @data: Watchdog object address.
+ * @t: The timer that PM watchdog depends on.
*
* Called when a driver has timed out suspending or resuming.
* There's not much we can do here to recover so panic() to
@@ -706,6 +706,19 @@ static bool is_async(struct device *dev)
&& !pm_trace_is_enabled();
}
+static bool dpm_async_fn(struct device *dev, async_func_t func)
+{
+ reinit_completion(&dev->power.completion);
+
+ if (is_async(dev)) {
+ get_device(dev);
+ async_schedule(func, dev);
+ return true;
+ }
+
+ return false;
+}
+
static void async_resume_noirq(void *data, async_cookie_t cookie)
{
struct device *dev = (struct device *)data;
@@ -732,13 +745,8 @@ void dpm_noirq_resume_devices(pm_message_t state)
* in case the starting of async threads is
* delayed by non-async resuming devices.
*/
- list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
- reinit_completion(&dev->power.completion);
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_resume_noirq, dev);
- }
- }
+ list_for_each_entry(dev, &dpm_noirq_list, power.entry)
+ dpm_async_fn(dev, async_resume_noirq);
while (!list_empty(&dpm_noirq_list)) {
dev = to_device(dpm_noirq_list.next);
@@ -889,13 +897,8 @@ void dpm_resume_early(pm_message_t state)
* in case the starting of async threads is
* delayed by non-async resuming devices.
*/
- list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
- reinit_completion(&dev->power.completion);
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_resume_early, dev);
- }
- }
+ list_for_each_entry(dev, &dpm_late_early_list, power.entry)
+ dpm_async_fn(dev, async_resume_early);
while (!list_empty(&dpm_late_early_list)) {
dev = to_device(dpm_late_early_list.next);
@@ -1053,13 +1056,8 @@ void dpm_resume(pm_message_t state)
pm_transition = state;
async_error = 0;
- list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
- reinit_completion(&dev->power.completion);
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_resume, dev);
- }
- }
+ list_for_each_entry(dev, &dpm_suspended_list, power.entry)
+ dpm_async_fn(dev, async_resume);
while (!list_empty(&dpm_suspended_list)) {
dev = to_device(dpm_suspended_list.next);
@@ -1373,13 +1371,9 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie)
static int device_suspend_noirq(struct device *dev)
{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_suspend_noirq, dev);
+ if (dpm_async_fn(dev, async_suspend_noirq))
return 0;
- }
+
return __device_suspend_noirq(dev, pm_transition, false);
}
@@ -1576,13 +1570,8 @@ static void async_suspend_late(void *data, async_cookie_t cookie)
static int device_suspend_late(struct device *dev)
{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_suspend_late, dev);
+ if (dpm_async_fn(dev, async_suspend_late))
return 0;
- }
return __device_suspend_late(dev, pm_transition, false);
}
@@ -1747,6 +1736,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
if (dev->power.syscore)
goto Complete;
+ /* Avoid direct_complete to let wakeup_path propagate. */
+ if (device_may_wakeup(dev) || dev->power.wakeup_path)
+ dev->power.direct_complete = false;
+
if (dev->power.direct_complete) {
if (pm_runtime_status_suspended(dev)) {
pm_runtime_disable(dev);
@@ -1842,13 +1835,8 @@ static void async_suspend(void *data, async_cookie_t cookie)
static int device_suspend(struct device *dev)
{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_suspend, dev);
+ if (dpm_async_fn(dev, async_suspend))
return 0;
- }
return __device_suspend(dev, pm_transition, false);
}
@@ -2069,8 +2057,8 @@ EXPORT_SYMBOL_GPL(__suspend_report_result);
/**
* device_pm_wait_for_dev - Wait for suspend/resume of a device to complete.
- * @dev: Device to wait for.
* @subordinate: Device that needs to wait for @dev.
+ * @dev: Device to wait for.
*/
int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
{
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index bb1ae175fae1..23c243a4c675 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -804,7 +804,7 @@ void pm_print_active_wakeup_sources(void)
srcuidx = srcu_read_lock(&wakeup_srcu);
list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
if (ws->active) {
- pr_debug("active wakeup source: %s\n", ws->name);
+ pm_pr_dbg("active wakeup source: %s\n", ws->name);
active = 1;
} else if (!active &&
(!last_activity_ws ||
@@ -815,7 +815,7 @@ void pm_print_active_wakeup_sources(void)
}
if (!active && last_activity_ws)
- pr_debug("last active wakeup source: %s\n",
+ pm_pr_dbg("last active wakeup source: %s\n",
last_activity_ws->name);
srcu_read_unlock(&wakeup_srcu, srcuidx);
}
@@ -845,7 +845,7 @@ bool pm_wakeup_pending(void)
raw_spin_unlock_irqrestore(&events_lock, flags);
if (ret) {
- pr_debug("Wakeup pending, aborting suspend\n");
+ pm_pr_dbg("Wakeup pending, aborting suspend\n");
pm_print_active_wakeup_sources();
}
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index b22e6bba71f1..4d2b33a30292 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -26,10 +26,6 @@ config CPU_FREQ_GOV_COMMON
select IRQ_WORK
bool
-config CPU_FREQ_BOOST_SW
- bool
- depends on THERMAL
-
config CPU_FREQ_STAT
bool "CPU frequency transition statistics"
help
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index c72258a44ba4..73bb2aafb1a8 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -366,7 +366,7 @@ static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *dat
val = drv_read(data, mask);
- pr_debug("get_cur_val = %u\n", val);
+ pr_debug("%s = %u\n", __func__, val);
return val;
}
@@ -378,7 +378,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
unsigned int freq;
unsigned int cached_freq;
- pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
+ pr_debug("%s (%d)\n", __func__, cpu);
policy = cpufreq_cpu_get_raw(cpu);
if (unlikely(!policy))
@@ -458,8 +458,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
if (acpi_pstate_strict) {
if (!check_freqs(policy, mask,
policy->freq_table[index].frequency)) {
- pr_debug("acpi_cpufreq_target failed (%d)\n",
- policy->cpu);
+ pr_debug("%s (%d)\n", __func__, policy->cpu);
result = -EAGAIN;
}
}
@@ -573,7 +572,7 @@ static int cpufreq_boost_down_prep(unsigned int cpu)
static int __init acpi_cpufreq_early_init(void)
{
unsigned int i;
- pr_debug("acpi_cpufreq_early_init\n");
+ pr_debug("%s\n", __func__);
acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
if (!acpi_perf_data) {
@@ -657,7 +656,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
static int blacklisted;
#endif
- pr_debug("acpi_cpufreq_cpu_init\n");
+ pr_debug("%s\n", __func__);
#ifdef CONFIG_SMP
if (blacklisted)
@@ -856,7 +855,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
struct acpi_cpufreq_data *data = policy->driver_data;
- pr_debug("acpi_cpufreq_cpu_exit\n");
+ pr_debug("%s\n", __func__);
policy->fast_switch_possible = false;
policy->driver_data = NULL;
@@ -881,7 +880,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
{
struct acpi_cpufreq_data *data = policy->driver_data;
- pr_debug("acpi_cpufreq_resume\n");
+ pr_debug("%s\n", __func__);
data->resume = 1;
@@ -954,7 +953,7 @@ static int __init acpi_cpufreq_init(void)
if (cpufreq_get_current_driver())
return -EEXIST;
- pr_debug("acpi_cpufreq_init\n");
+ pr_debug("%s\n", __func__);
ret = acpi_cpufreq_early_init();
if (ret)
@@ -991,7 +990,7 @@ static int __init acpi_cpufreq_init(void)
static void __exit acpi_cpufreq_exit(void)
{
- pr_debug("acpi_cpufreq_exit\n");
+ pr_debug("%s\n", __func__);
acpi_cpufreq_boost_exit();
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index 4ac7c3cf34be..6927a8c0e748 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -124,7 +124,7 @@ static int __init amd_freq_sensitivity_init(void)
PCI_DEVICE_ID_AMD_KERNCZ_SMBUS, NULL);
if (!pcidev) {
- if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK))
+ if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK))
return -ENODEV;
}
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index 75491fc841a6..0df16eb1eb3c 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -359,11 +359,11 @@ static int __init armada37xx_cpufreq_driver_init(void)
struct armada_37xx_dvfs *dvfs;
struct platform_device *pdev;
unsigned long freq;
- unsigned int cur_frequency;
+ unsigned int cur_frequency, base_frequency;
struct regmap *nb_pm_base, *avs_base;
struct device *cpu_dev;
int load_lvl, ret;
- struct clk *clk;
+ struct clk *clk, *parent;
nb_pm_base =
syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm");
@@ -399,6 +399,22 @@ static int __init armada37xx_cpufreq_driver_init(void)
return PTR_ERR(clk);
}
+ parent = clk_get_parent(clk);
+ if (IS_ERR(parent)) {
+ dev_err(cpu_dev, "Cannot get parent clock for CPU0\n");
+ clk_put(clk);
+ return PTR_ERR(parent);
+ }
+
+ /* Get parent CPU frequency */
+ base_frequency = clk_get_rate(parent);
+
+ if (!base_frequency) {
+ dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n");
+ clk_put(clk);
+ return -EINVAL;
+ }
+
/* Get nominal (current) CPU frequency */
cur_frequency = clk_get_rate(clk);
if (!cur_frequency) {
@@ -431,7 +447,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
load_lvl++) {
unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000;
- freq = cur_frequency / dvfs->divider[load_lvl];
+ freq = base_frequency / dvfs->divider[load_lvl];
ret = dev_pm_opp_add(cpu_dev, freq, u_volt);
if (ret)
goto remove_opp;
diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c
index b3f4bd647e9b..988ebc326bdb 100644
--- a/drivers/cpufreq/armada-8k-cpufreq.c
+++ b/drivers/cpufreq/armada-8k-cpufreq.c
@@ -132,6 +132,7 @@ static int __init armada_8k_cpufreq_init(void)
of_node_put(node);
return -ENODEV;
}
+ of_node_put(node);
nb_cpus = num_possible_cpus();
freq_tables = kcalloc(nb_cpus, sizeof(*freq_tables), GFP_KERNEL);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e10922709d13..7ea217c88c2e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -34,11 +34,6 @@
static LIST_HEAD(cpufreq_policy_list);
-static inline bool policy_is_inactive(struct cpufreq_policy *policy)
-{
- return cpumask_empty(policy->cpus);
-}
-
/* Macros to iterate over CPU policies */
#define for_each_suitable_policy(__policy, __active) \
list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \
@@ -250,6 +245,51 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy)
}
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
+/**
+ * cpufreq_cpu_release - Unlock a policy and decrement its usage counter.
+ * @policy: cpufreq policy returned by cpufreq_cpu_acquire().
+ */
+void cpufreq_cpu_release(struct cpufreq_policy *policy)
+{
+ if (WARN_ON(!policy))
+ return;
+
+ lockdep_assert_held(&policy->rwsem);
+
+ up_write(&policy->rwsem);
+
+ cpufreq_cpu_put(policy);
+}
+
+/**
+ * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it.
+ * @cpu: CPU to find the policy for.
+ *
+ * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and
+ * if the policy returned by it is not NULL, acquire its rwsem for writing.
+ * Return the policy if it is active or release it and return NULL otherwise.
+ *
+ * The policy returned by this function has to be released with the help of
+ * cpufreq_cpu_release() in order to release its rwsem and balance its usage
+ * counter properly.
+ */
+struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+ if (!policy)
+ return NULL;
+
+ down_write(&policy->rwsem);
+
+ if (policy_is_inactive(policy)) {
+ cpufreq_cpu_release(policy);
+ return NULL;
+ }
+
+ return policy;
+}
+
/*********************************************************************
* EXTERNALLY AFFECTING FREQUENCY CHANGES *
*********************************************************************/
@@ -669,9 +709,6 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf)
return ret;
}
-static int cpufreq_set_policy(struct cpufreq_policy *policy,
- struct cpufreq_policy *new_policy);
-
/**
* cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
*/
@@ -857,11 +894,9 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
{
unsigned int limit;
int ret;
- if (cpufreq_driver->bios_limit) {
- ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
- if (!ret)
- return sprintf(buf, "%u\n", limit);
- }
+ ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+ if (!ret)
+ return sprintf(buf, "%u\n", limit);
return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
}
@@ -1098,6 +1133,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
cpufreq_global_kobject, "policy%u", cpu);
if (ret) {
pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
+ kobject_put(&policy->kobj);
goto err_free_real_cpus;
}
@@ -1550,7 +1586,7 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy)
{
unsigned int ret_freq = 0;
- if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get)
+ if (unlikely(policy_is_inactive(policy)))
return ret_freq;
ret_freq = cpufreq_driver->get(policy->cpu);
@@ -1588,7 +1624,8 @@ unsigned int cpufreq_get(unsigned int cpu)
if (policy) {
down_read(&policy->rwsem);
- ret_freq = __cpufreq_get(policy);
+ if (cpufreq_driver->get)
+ ret_freq = __cpufreq_get(policy);
up_read(&policy->rwsem);
cpufreq_cpu_put(policy);
@@ -2229,8 +2266,8 @@ EXPORT_SYMBOL(cpufreq_get_policy);
*
* The cpuinfo part of @policy is not updated by this function.
*/
-static int cpufreq_set_policy(struct cpufreq_policy *policy,
- struct cpufreq_policy *new_policy)
+int cpufreq_set_policy(struct cpufreq_policy *policy,
+ struct cpufreq_policy *new_policy)
{
struct cpufreq_governor *old_gov;
int ret;
@@ -2337,17 +2374,12 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
*/
void cpufreq_update_policy(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
struct cpufreq_policy new_policy;
if (!policy)
return;
- down_write(&policy->rwsem);
-
- if (policy_is_inactive(policy))
- goto unlock;
-
/*
* BIOS might change freq behind our back
* -> ask driver for current freq and notify governors about a change
@@ -2364,12 +2396,26 @@ void cpufreq_update_policy(unsigned int cpu)
cpufreq_set_policy(policy, &new_policy);
unlock:
- up_write(&policy->rwsem);
-
- cpufreq_cpu_put(policy);
+ cpufreq_cpu_release(policy);
}
EXPORT_SYMBOL(cpufreq_update_policy);
+/**
+ * cpufreq_update_limits - Update policy limits for a given CPU.
+ * @cpu: CPU to update the policy limits for.
+ *
+ * Invoke the driver's ->update_limits callback if present or call
+ * cpufreq_update_policy() for @cpu.
+ */
+void cpufreq_update_limits(unsigned int cpu)
+{
+ if (cpufreq_driver->update_limits)
+ cpufreq_driver->update_limits(cpu);
+ else
+ cpufreq_update_policy(cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_update_limits);
+
/*********************************************************************
* BOOST *
*********************************************************************/
@@ -2426,7 +2472,7 @@ int cpufreq_boost_trigger_state(int state)
static bool cpufreq_boost_supported(void)
{
- return likely(cpufreq_driver) && cpufreq_driver->set_boost;
+ return cpufreq_driver->set_boost;
}
static int create_boost_sysfs_file(void)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index ffa9adeaba31..9d1d9bf02710 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -459,6 +459,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
/* Failure, so roll back. */
pr_err("initialization failed (dbs_data kobject init error %d)\n", ret);
+ kobject_put(&dbs_data->attr_set.kobj);
+
policy->governor_data = NULL;
if (!have_governor_per_policy())
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index e2db5581489a..08b192eb22c6 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/slab.h>
-static DEFINE_SPINLOCK(cpufreq_stats_lock);
struct cpufreq_stats {
unsigned int total_trans;
@@ -23,6 +22,7 @@ struct cpufreq_stats {
unsigned int state_num;
unsigned int last_index;
u64 *time_in_state;
+ spinlock_t lock;
unsigned int *freq_table;
unsigned int *trans_table;
};
@@ -39,12 +39,12 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
{
unsigned int count = stats->max_state;
- spin_lock(&cpufreq_stats_lock);
+ spin_lock(&stats->lock);
memset(stats->time_in_state, 0, count * sizeof(u64));
memset(stats->trans_table, 0, count * count * sizeof(int));
stats->last_time = get_jiffies_64();
stats->total_trans = 0;
- spin_unlock(&cpufreq_stats_lock);
+ spin_unlock(&stats->lock);
}
static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
@@ -62,9 +62,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
if (policy->fast_switch_enabled)
return 0;
- spin_lock(&cpufreq_stats_lock);
+ spin_lock(&stats->lock);
cpufreq_stats_update(stats);
- spin_unlock(&cpufreq_stats_lock);
+ spin_unlock(&stats->lock);
for (i = 0; i < stats->state_num; i++) {
len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i],
@@ -211,6 +211,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
stats->state_num = i;
stats->last_time = get_jiffies_64();
stats->last_index = freq_table_get_index(stats, policy->cur);
+ spin_lock_init(&stats->lock);
policy->stats = stats;
ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
@@ -242,11 +243,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
if (old_index == -1 || new_index == -1 || old_index == new_index)
return;
- spin_lock(&cpufreq_stats_lock);
+ spin_lock(&stats->lock);
cpufreq_stats_update(stats);
stats->last_index = new_index;
stats->trans_table[old_index * stats->max_state + new_index]++;
stats->total_trans++;
- spin_unlock(&cpufreq_stats_lock);
+ spin_unlock(&stats->lock);
}
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 3a8cc99e6815..e7be0af3199f 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -290,9 +290,6 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_boost_freqs);
struct freq_attr *cpufreq_generic_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
-#ifdef CONFIG_CPU_FREQ_BOOST_SW
- &cpufreq_freq_attr_scaling_boost_freqs,
-#endif
NULL,
};
EXPORT_SYMBOL_GPL(cpufreq_generic_attr);
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index a4ff09f91c8f..3e17560b1efe 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -388,11 +388,11 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
ret = imx6ul_opp_check_speed_grading(cpu_dev);
if (ret) {
if (ret == -EPROBE_DEFER)
- return ret;
+ goto put_node;
dev_err(cpu_dev, "failed to read ocotp: %d\n",
ret);
- return ret;
+ goto put_node;
}
} else {
imx6q_opp_check_speed_grading(cpu_dev);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2986119dd31f..34b54df41aaa 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -179,6 +179,7 @@ struct vid_data {
* based on the MSR_IA32_MISC_ENABLE value and whether or
* not the maximum reported turbo P-state is different from
* the maximum reported non-turbo one.
+ * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq.
* @min_perf_pct: Minimum capacity limit in percent of the maximum turbo
* P-state capacity.
* @max_perf_pct: Maximum capacity limit in percent of the maximum turbo
@@ -187,6 +188,7 @@ struct vid_data {
struct global_params {
bool no_turbo;
bool turbo_disabled;
+ bool turbo_disabled_mf;
int max_perf_pct;
int min_perf_pct;
};
@@ -525,7 +527,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
u64 epb;
int ret;
- if (!static_cpu_has(X86_FEATURE_EPB))
+ if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENXIO;
ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
@@ -539,7 +541,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
{
s16 epp;
- if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
/*
* When hwp_req_data is 0, means that caller didn't read
* MSR_HWP_REQUEST, so need to read and get EPP.
@@ -564,7 +566,7 @@ static int intel_pstate_set_epb(int cpu, s16 pref)
u64 epb;
int ret;
- if (!static_cpu_has(X86_FEATURE_EPB))
+ if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENXIO;
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
@@ -612,7 +614,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
if (epp < 0)
return epp;
- if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
if (epp == HWP_EPP_PERFORMANCE)
return 1;
if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
@@ -621,7 +623,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
return 3;
else
return 4;
- } else if (static_cpu_has(X86_FEATURE_EPB)) {
+ } else if (boot_cpu_has(X86_FEATURE_EPB)) {
/*
* Range:
* 0x00-0x03 : Performance
@@ -649,7 +651,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
mutex_lock(&intel_pstate_limits_lock);
- if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
u64 value;
ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
@@ -824,7 +826,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
epp = cpu_data->epp_powersave;
}
update_epp:
- if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
value &= ~GENMASK_ULL(31, 24);
value |= (u64)epp << 24;
} else {
@@ -849,7 +851,7 @@ static void intel_pstate_hwp_force_min_perf(int cpu)
value |= HWP_MIN_PERF(min_perf);
/* Set EPP/EPB to min */
- if (static_cpu_has(X86_FEATURE_HWP_EPP))
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP))
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
else
intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE);
@@ -897,6 +899,48 @@ static void intel_pstate_update_policies(void)
cpufreq_update_policy(cpu);
}
+static void intel_pstate_update_max_freq(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
+ struct cpufreq_policy new_policy;
+ struct cpudata *cpudata;
+
+ if (!policy)
+ return;
+
+ cpudata = all_cpu_data[cpu];
+ policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
+ cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
+
+ memcpy(&new_policy, policy, sizeof(*policy));
+ new_policy.max = min(policy->user_policy.max, policy->cpuinfo.max_freq);
+ new_policy.min = min(policy->user_policy.min, new_policy.max);
+
+ cpufreq_set_policy(policy, &new_policy);
+
+ cpufreq_cpu_release(policy);
+}
+
+static void intel_pstate_update_limits(unsigned int cpu)
+{
+ mutex_lock(&intel_pstate_driver_lock);
+
+ update_turbo_state();
+ /*
+ * If turbo has been turned on or off globally, policy limits for
+ * all CPUs need to be updated to reflect that.
+ */
+ if (global.turbo_disabled_mf != global.turbo_disabled) {
+ global.turbo_disabled_mf = global.turbo_disabled;
+ for_each_possible_cpu(cpu)
+ intel_pstate_update_max_freq(cpu);
+ } else {
+ cpufreq_update_policy(cpu);
+ }
+
+ mutex_unlock(&intel_pstate_driver_lock);
+}
+
/************************** sysfs begin ************************/
#define show_one(file_name, object) \
static ssize_t show_##file_name \
@@ -1197,7 +1241,7 @@ static void __init intel_pstate_sysfs_expose_params(void)
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
/* First disable HWP notification interrupt as we don't process them */
- if (static_cpu_has(X86_FEATURE_HWP_NOTIFY))
+ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
@@ -2138,6 +2182,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
update_turbo_state();
+ global.turbo_disabled_mf = global.turbo_disabled;
policy->cpuinfo.max_freq = global.turbo_disabled ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
@@ -2182,6 +2227,7 @@ static struct cpufreq_driver intel_pstate = {
.init = intel_pstate_cpu_init,
.exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
+ .update_limits = intel_pstate_update_limits,
.name = "intel_pstate",
};
@@ -2316,6 +2362,7 @@ static struct cpufreq_driver intel_cpufreq = {
.init = intel_cpufreq_cpu_init,
.exit = intel_pstate_cpu_exit,
.stop_cpu = intel_cpufreq_stop_cpu,
+ .update_limits = intel_pstate_update_limits,
.name = "intel_cpufreq",
};
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index c2dd43f3f5d8..8d63a6dc8383 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -124,13 +124,14 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk");
if (IS_ERR(priv.cpu_clk)) {
dev_err(priv.dev, "Unable to get cpuclk\n");
- return PTR_ERR(priv.cpu_clk);
+ err = PTR_ERR(priv.cpu_clk);
+ goto out_node;
}
err = clk_prepare_enable(priv.cpu_clk);
if (err) {
dev_err(priv.dev, "Unable to prepare cpuclk\n");
- return err;
+ goto out_node;
}
kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000;
@@ -161,20 +162,22 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
goto out_ddr;
}
- of_node_put(np);
- np = NULL;
-
err = cpufreq_register_driver(&kirkwood_cpufreq_driver);
- if (!err)
- return 0;
+ if (err) {
+ dev_err(priv.dev, "Failed to register cpufreq driver\n");
+ goto out_powersave;
+ }
- dev_err(priv.dev, "Failed to register cpufreq driver\n");
+ of_node_put(np);
+ return 0;
+out_powersave:
clk_disable_unprepare(priv.powersave_clk);
out_ddr:
clk_disable_unprepare(priv.ddr_clk);
out_cpu:
clk_disable_unprepare(priv.cpu_clk);
+out_node:
of_node_put(np);
return err;
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index d9df89392b84..a94355723ef8 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -210,7 +210,7 @@ static int __init maple_cpufreq_init(void)
*/
valp = of_get_property(cpunode, "clock-frequency", NULL);
if (!valp)
- return -ENODEV;
+ goto bail_noprops;
max_freq = (*valp)/1000;
maple_cpu_freqs[0].frequency = max_freq;
maple_cpu_freqs[1].frequency = max_freq/2;
@@ -231,10 +231,6 @@ static int __init maple_cpufreq_init(void)
rc = cpufreq_register_driver(&maple_cpufreq_driver);
- of_node_put(cpunode);
-
- return rc;
-
bail_noprops:
of_node_put(cpunode);
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 75dfbd2a58ea..c7710c149de8 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -146,6 +146,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
cpu = of_get_cpu_node(policy->cpu, NULL);
+ of_node_put(cpu);
if (!cpu)
goto out;
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 52f0d91d30c1..9b4ce2eb8222 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -552,6 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
if (volt_gpio_np)
voltage_gpio = read_gpio(volt_gpio_np);
+ of_node_put(volt_gpio_np);
if (!voltage_gpio){
pr_err("missing cpu-vcore-select gpio\n");
return 1;
@@ -588,6 +589,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
if (volt_gpio_np)
voltage_gpio = read_gpio(volt_gpio_np);
+ of_node_put(volt_gpio_np);
pvr = mfspr(SPRN_PVR);
has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index fb77b39a4ce3..3c12e03fa343 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1178,7 +1178,7 @@ static int powernowk8_init(void)
unsigned int i, supported_cpus = 0;
int ret;
- if (static_cpu_has(X86_FEATURE_HW_PSTATE)) {
+ if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) {
__request_acpi_cpufreq();
return -ENODEV;
}
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index 41a0f0be3f9f..8414c3a4ea08 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -86,6 +86,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
!cbe_get_cpu_mic_tm_regs(policy->cpu)) {
pr_info("invalid CBE regs pointers for cpufreq\n");
+ of_node_put(cpu);
return -EINVAL;
}
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 4295e5476264..71b640c8c1a5 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -280,10 +280,12 @@ static const struct of_device_id node_matches[] __initconst = {
{ .compatible = "fsl,ls1012a-clockgen", },
{ .compatible = "fsl,ls1021a-clockgen", },
+ { .compatible = "fsl,ls1028a-clockgen", },
{ .compatible = "fsl,ls1043a-clockgen", },
{ .compatible = "fsl,ls1046a-clockgen", },
{ .compatible = "fsl,ls1088a-clockgen", },
{ .compatible = "fsl,ls2080a-clockgen", },
+ { .compatible = "fsl,lx2160a-clockgen", },
{ .compatible = "fsl,p4080-clockgen", },
{ .compatible = "fsl,qoriq-clockgen-1.0", },
{ .compatible = "fsl,qoriq-clockgen-2.0", },
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index a1fb735685db..e086b2dd4072 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -412,7 +412,7 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
}
/**
- * centrino_setpolicy - set a new CPUFreq policy
+ * centrino_target - set a new CPUFreq policy
* @policy: new policy
* @index: index of target frequency
*
diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c
index 0171a6e190d7..f7199a35cbb6 100644
--- a/drivers/cpuidle/cpuidle-exynos.c
+++ b/drivers/cpuidle/cpuidle-exynos.c
@@ -84,7 +84,7 @@ static struct cpuidle_driver exynos_idle_driver = {
[1] = {
.enter = exynos_enter_lowpower,
.exit_latency = 300,
- .target_residency = 100000,
+ .target_residency = 10000,
.name = "C1",
.desc = "ARM power down",
},
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 7f108309e871..0f4b7c45df3e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int index)
{
+ int ret = 0;
+
+ /*
+ * Store the next hrtimer, which becomes either next tick or the next
+ * timer event, whatever expires first. Additionally, to make this data
+ * useful for consumers outside cpuidle, we rely on that the governor's
+ * ->select() callback have decided, whether to stop the tick or not.
+ */
+ WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer());
+
if (cpuidle_state_is_coupled(drv, index))
- return cpuidle_enter_state_coupled(dev, drv, index);
- return cpuidle_enter_state(dev, drv, index);
+ ret = cpuidle_enter_state_coupled(dev, drv, index);
+ else
+ ret = cpuidle_enter_state(dev, drv, index);
+
+ WRITE_ONCE(dev->next_hrtimer, 0);
+ return ret;
}
/**
@@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
{
memset(dev->states_usage, 0, sizeof(dev->states_usage));
dev->last_residency = 0;
+ dev->next_hrtimer = 0;
}
/**
diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index d67242d87744..87e93406d7cd 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -240,7 +240,7 @@ struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(struct device *dev,
}
list_for_each_entry(edev, &devfreq_event_list, node) {
- if (!strcmp(edev->desc->name, node->name))
+ if (of_node_name_eq(node, edev->desc->name))
goto out;
}
edev = NULL;
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 0ae3de76833b..6b6991f0e873 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -29,6 +29,9 @@
#include <linux/of.h>
#include "governor.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/devfreq.h>
+
static struct class *devfreq_class;
/*
@@ -228,7 +231,7 @@ static struct devfreq_governor *find_devfreq_governor(const char *name)
* if is not found. This can happen when both drivers (the governor driver
* and the driver that call devfreq_add_device) are built as modules.
* devfreq_list_lock should be held by the caller. Returns the matched
- * governor's pointer.
+ * governor's pointer or an error pointer.
*/
static struct devfreq_governor *try_then_request_governor(const char *name)
{
@@ -254,7 +257,7 @@ static struct devfreq_governor *try_then_request_governor(const char *name)
/* Restore previous state before return */
mutex_lock(&devfreq_list_lock);
if (err)
- return NULL;
+ return ERR_PTR(err);
governor = find_devfreq_governor(name);
}
@@ -394,6 +397,8 @@ static void devfreq_monitor(struct work_struct *work)
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
mutex_unlock(&devfreq->lock);
+
+ trace_devfreq_monitor(devfreq);
}
/**
@@ -528,7 +533,7 @@ void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay)
mutex_lock(&devfreq->lock);
if (!devfreq->stop_polling)
queue_delayed_work(devfreq_wq, &devfreq->work,
- msecs_to_jiffies(devfreq->profile->polling_ms));
+ msecs_to_jiffies(devfreq->profile->polling_ms));
}
out:
mutex_unlock(&devfreq->lock);
@@ -537,7 +542,7 @@ EXPORT_SYMBOL(devfreq_interval_update);
/**
* devfreq_notifier_call() - Notify that the device frequency requirements
- * has been changed out of devfreq framework.
+ * has been changed out of devfreq framework.
* @nb: the notifier_block (supposed to be devfreq->nb)
* @type: not used
* @devp: not used
@@ -651,7 +656,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
mutex_unlock(&devfreq->lock);
err = set_freq_table(devfreq);
if (err < 0)
- goto err_out;
+ goto err_dev;
mutex_lock(&devfreq->lock);
}
@@ -683,16 +688,27 @@ struct devfreq *devfreq_add_device(struct device *dev,
goto err_out;
}
- devfreq->trans_table =
- devm_kzalloc(&devfreq->dev,
- array3_size(sizeof(unsigned int),
- devfreq->profile->max_state,
- devfreq->profile->max_state),
- GFP_KERNEL);
+ devfreq->trans_table = devm_kzalloc(&devfreq->dev,
+ array3_size(sizeof(unsigned int),
+ devfreq->profile->max_state,
+ devfreq->profile->max_state),
+ GFP_KERNEL);
+ if (!devfreq->trans_table) {
+ mutex_unlock(&devfreq->lock);
+ err = -ENOMEM;
+ goto err_devfreq;
+ }
+
devfreq->time_in_state = devm_kcalloc(&devfreq->dev,
- devfreq->profile->max_state,
- sizeof(unsigned long),
- GFP_KERNEL);
+ devfreq->profile->max_state,
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!devfreq->time_in_state) {
+ mutex_unlock(&devfreq->lock);
+ err = -ENOMEM;
+ goto err_devfreq;
+ }
+
devfreq->last_stat_updated = jiffies;
srcu_init_notifier_head(&devfreq->transition_notifier_list);
@@ -726,7 +742,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
err_init:
mutex_unlock(&devfreq_list_lock);
-
+err_devfreq:
devfreq_remove_device(devfreq);
devfreq = NULL;
err_dev:
@@ -1113,7 +1129,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
struct devfreq *df = to_devfreq(dev);
int ret;
char str_governor[DEVFREQ_NAME_LEN + 1];
- struct devfreq_governor *governor;
+ const struct devfreq_governor *governor, *prev_governor;
ret = sscanf(buf, "%" __stringify(DEVFREQ_NAME_LEN) "s", str_governor);
if (ret != 1)
@@ -1142,12 +1158,24 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
goto out;
}
}
+ prev_governor = df->governor;
df->governor = governor;
strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN);
ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL);
- if (ret)
+ if (ret) {
dev_warn(dev, "%s: Governor %s not started(%d)\n",
__func__, df->governor->name, ret);
+ df->governor = prev_governor;
+ strncpy(df->governor_name, prev_governor->name,
+ DEVFREQ_NAME_LEN);
+ ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL);
+ if (ret) {
+ dev_err(dev,
+ "%s: reverting to Governor %s failed (%d)\n",
+ __func__, df->governor_name, ret);
+ df->governor = NULL;
+ }
+ }
out:
mutex_unlock(&devfreq_list_lock);
@@ -1172,7 +1200,7 @@ static ssize_t available_governors_show(struct device *d,
*/
if (df->governor->immutable) {
count = scnprintf(&buf[count], DEVFREQ_NAME_LEN,
- "%s ", df->governor_name);
+ "%s ", df->governor_name);
/*
* The devfreq device shows the registered governor except for
* immutable governors such as passive governor .
@@ -1485,8 +1513,8 @@ EXPORT_SYMBOL(devfreq_recommended_opp);
/**
* devfreq_register_opp_notifier() - Helper function to get devfreq notified
- * for any changes in the OPP availability
- * changes
+ * for any changes in the OPP availability
+ * changes
* @dev: The devfreq user device. (parent of devfreq)
* @devfreq: The devfreq object.
*/
@@ -1498,8 +1526,8 @@ EXPORT_SYMBOL(devfreq_register_opp_notifier);
/**
* devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq
- * notified for any changes in the OPP
- * availability changes anymore.
+ * notified for any changes in the OPP
+ * availability changes anymore.
* @dev: The devfreq user device. (parent of devfreq)
* @devfreq: The devfreq object.
*
@@ -1518,8 +1546,8 @@ static void devm_devfreq_opp_release(struct device *dev, void *res)
}
/**
- * devm_ devfreq_register_opp_notifier()
- * - Resource-managed devfreq_register_opp_notifier()
+ * devm_devfreq_register_opp_notifier() - Resource-managed
+ * devfreq_register_opp_notifier()
* @dev: The devfreq user device. (parent of devfreq)
* @devfreq: The devfreq object.
*/
@@ -1547,8 +1575,8 @@ int devm_devfreq_register_opp_notifier(struct device *dev,
EXPORT_SYMBOL(devm_devfreq_register_opp_notifier);
/**
- * devm_devfreq_unregister_opp_notifier()
- * - Resource-managed devfreq_unregister_opp_notifier()
+ * devm_devfreq_unregister_opp_notifier() - Resource-managed
+ * devfreq_unregister_opp_notifier()
* @dev: The devfreq user device. (parent of devfreq)
* @devfreq: The devfreq object.
*/
@@ -1567,8 +1595,8 @@ EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier);
* @list: DEVFREQ_TRANSITION_NOTIFIER.
*/
int devfreq_register_notifier(struct devfreq *devfreq,
- struct notifier_block *nb,
- unsigned int list)
+ struct notifier_block *nb,
+ unsigned int list)
{
int ret = 0;
@@ -1674,9 +1702,9 @@ EXPORT_SYMBOL(devm_devfreq_register_notifier);
* @list: DEVFREQ_TRANSITION_NOTIFIER.
*/
void devm_devfreq_unregister_notifier(struct device *dev,
- struct devfreq *devfreq,
- struct notifier_block *nb,
- unsigned int list)
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
{
WARN_ON(devres_release(dev, devm_devfreq_notifier_release,
devm_devfreq_dev_match, devfreq));
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index c61de0bdf053..c2ea94957501 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -529,7 +529,7 @@ static int of_get_devfreq_events(struct device_node *np,
if (!ppmu_events[i].name)
continue;
- if (!of_node_cmp(node->name, ppmu_events[i].name))
+ if (of_node_name_eq(node, ppmu_events[i].name))
break;
}
diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
index 22b113363ffc..a436ec4901bb 100644
--- a/drivers/devfreq/event/rockchip-dfi.c
+++ b/drivers/devfreq/event/rockchip-dfi.c
@@ -26,6 +26,8 @@
#include <linux/list.h>
#include <linux/of.h>
+#include <soc/rockchip/rk3399_grf.h>
+
#define RK3399_DMC_NUM_CH 2
/* DDRMON_CTRL */
@@ -43,18 +45,6 @@
#define DDRMON_CH1_COUNT_NUM 0x3c
#define DDRMON_CH1_DFI_ACCESS_NUM 0x40
-/* pmu grf */
-#define PMUGRF_OS_REG2 0x308
-#define DDRTYPE_SHIFT 13
-#define DDRTYPE_MASK 7
-
-enum {
- DDR3 = 3,
- LPDDR3 = 6,
- LPDDR4 = 7,
- UNUSED = 0xFF
-};
-
struct dmc_usage {
u32 access;
u32 total;
@@ -83,16 +73,17 @@ static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev)
u32 ddr_type;
/* get ddr type */
- regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val);
- ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK;
+ regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val);
+ ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) &
+ RK3399_PMUGRF_DDRTYPE_MASK;
/* clear DDRMON_CTRL setting */
writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL);
/* set ddr type to dfi */
- if (ddr_type == LPDDR3)
+ if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3)
writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL);
- else if (ddr_type == LPDDR4)
+ else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4)
writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL);
/* enable count, use software mode */
@@ -211,7 +202,7 @@ static int rockchip_dfi_probe(struct platform_device *pdev)
if (IS_ERR(data->clk)) {
dev_err(dev, "Cannot get the clk dmc_clk\n");
return PTR_ERR(data->clk);
- };
+ }
/* try to find the optional reference to the pmu syscon */
node = of_parse_phandle(np, "rockchip,pmu", 0);
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index c25658b26598..486cc5b422f1 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -514,6 +514,13 @@ err:
return ret;
}
+static void exynos_bus_shutdown(struct platform_device *pdev)
+{
+ struct exynos_bus *bus = dev_get_drvdata(&pdev->dev);
+
+ devfreq_suspend_device(bus->devfreq);
+}
+
#ifdef CONFIG_PM_SLEEP
static int exynos_bus_resume(struct device *dev)
{
@@ -556,6 +563,7 @@ MODULE_DEVICE_TABLE(of, exynos_bus_of_match);
static struct platform_driver exynos_bus_platdrv = {
.probe = exynos_bus_probe,
+ .shutdown = exynos_bus_shutdown,
.driver = {
.name = "exynos-bus",
.pm = &exynos_bus_pm,
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index e795ad2b3f6b..567c034d0301 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -18,14 +18,17 @@
#include <linux/devfreq.h>
#include <linux/devfreq-event.h>
#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_opp.h>
+#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/rwsem.h>
#include <linux/suspend.h>
+#include <soc/rockchip/rk3399_grf.h>
#include <soc/rockchip/rockchip_sip.h>
struct dram_timing {
@@ -69,8 +72,11 @@ struct rk3399_dmcfreq {
struct mutex lock;
struct dram_timing timing;
struct regulator *vdd_center;
+ struct regmap *regmap_pmu;
unsigned long rate, target_rate;
unsigned long volt, target_volt;
+ unsigned int odt_dis_freq;
+ int odt_pd_arg0, odt_pd_arg1;
};
static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
@@ -80,6 +86,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
struct dev_pm_opp *opp;
unsigned long old_clk_rate = dmcfreq->rate;
unsigned long target_volt, target_rate;
+ struct arm_smccc_res res;
+ bool odt_enable = false;
int err;
opp = devfreq_recommended_opp(dev, freq, flags);
@@ -95,6 +103,19 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
mutex_lock(&dmcfreq->lock);
+ if (target_rate >= dmcfreq->odt_dis_freq)
+ odt_enable = true;
+
+ /*
+ * This makes a SMC call to the TF-A to set the DDR PD (power-down)
+ * timings and to enable or disable the ODT (on-die termination)
+ * resistors.
+ */
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
+ dmcfreq->odt_pd_arg1,
+ ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
+ odt_enable, 0, 0, 0, &res);
+
/*
* If frequency scaling from low to high, adjust voltage first.
* If frequency scaling from high to low, adjust frequency first.
@@ -294,11 +315,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
{
struct arm_smccc_res res;
struct device *dev = &pdev->dev;
- struct device_node *np = pdev->dev.of_node;
+ struct device_node *np = pdev->dev.of_node, *node;
struct rk3399_dmcfreq *data;
int ret, index, size;
uint32_t *timing;
struct dev_pm_opp *opp;
+ u32 ddr_type;
+ u32 val;
data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
if (!data)
@@ -322,7 +345,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
dev_err(dev, "Cannot get the clk dmc_clk\n");
return PTR_ERR(data->dmc_clk);
- };
+ }
data->edev = devfreq_event_get_edev_by_phandle(dev, 0);
if (IS_ERR(data->edev))
@@ -354,11 +377,57 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
}
}
+ node = of_parse_phandle(np, "rockchip,pmu", 0);
+ if (node) {
+ data->regmap_pmu = syscon_node_to_regmap(node);
+ if (IS_ERR(data->regmap_pmu))
+ return PTR_ERR(data->regmap_pmu);
+ }
+
+ regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val);
+ ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) &
+ RK3399_PMUGRF_DDRTYPE_MASK;
+
+ switch (ddr_type) {
+ case RK3399_PMUGRF_DDRTYPE_DDR3:
+ data->odt_dis_freq = data->timing.ddr3_odt_dis_freq;
+ break;
+ case RK3399_PMUGRF_DDRTYPE_LPDDR3:
+ data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq;
+ break;
+ case RK3399_PMUGRF_DDRTYPE_LPDDR4:
+ data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq;
+ break;
+ default:
+ return -EINVAL;
+ };
+
arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
ROCKCHIP_SIP_CONFIG_DRAM_INIT,
0, 0, 0, 0, &res);
/*
+ * In TF-A there is a platform SIP call to set the PD (power-down)
+ * timings and to enable or disable the ODT (on-die termination).
+ * This call needs three arguments as follows:
+ *
+ * arg0:
+ * bit[0-7] : sr_idle
+ * bit[8-15] : sr_mc_gate_idle
+ * bit[16-31] : standby idle
+ * arg1:
+ * bit[0-11] : pd_idle
+ * bit[16-27] : srpd_lite_idle
+ * arg2:
+ * bit[0] : odt enable
+ */
+ data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) |
+ ((data->timing.sr_mc_gate_idle & 0xff) << 8) |
+ ((data->timing.standby_idle & 0xffff) << 16);
+ data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) |
+ ((data->timing.srpd_lite_idle & 0xfff) << 16);
+
+ /*
* We add a devfreq driver to our parent since it has a device tree node
* with operating points.
*/
diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index c59d2eee5d30..c89ba7b834ff 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -573,10 +573,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
static int tegra_governor_event_handler(struct devfreq *devfreq,
unsigned int event, void *data)
{
- struct tegra_devfreq *tegra;
- int ret = 0;
-
- tegra = dev_get_drvdata(devfreq->dev.parent);
+ struct tegra_devfreq *tegra = dev_get_drvdata(devfreq->dev.parent);
switch (event) {
case DEVFREQ_GOV_START:
@@ -600,7 +597,7 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
break;
}
- return ret;
+ return 0;
}
static struct devfreq_governor tegra_devfreq_governor = {
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index cac16c4b0df3..7b655f6156fb 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -5,20 +5,6 @@
menu "Firmware Drivers"
-config ARM_PSCI_FW
- bool
-
-config ARM_PSCI_CHECKER
- bool "ARM PSCI checker"
- depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST
- help
- Run the PSCI checker during startup. This checks that hotplug and
- suspend operations work correctly when using PSCI.
-
- The torture tests may interfere with the PSCI checker by turning CPUs
- on and off through hotplug, so for now torture tests and PSCI checker
- are mutually exclusive.
-
config ARM_SCMI_PROTOCOL
bool "ARM System Control and Management Interface (SCMI) Message Protocol"
depends on ARM || ARM64 || COMPILE_TEST
@@ -270,6 +256,7 @@ config TI_SCI_PROTOCOL
config HAVE_ARM_SMCCC
bool
+source "drivers/firmware/psci/Kconfig"
source "drivers/firmware/broadcom/Kconfig"
source "drivers/firmware/google/Kconfig"
source "drivers/firmware/efi/Kconfig"
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 80feb635120f..9a3909a22682 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -2,8 +2,6 @@
#
# Makefile for the linux kernel.
#
-obj-$(CONFIG_ARM_PSCI_FW) += psci.o
-obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o
obj-$(CONFIG_ARM_SCPI_PROTOCOL) += arm_scpi.o
obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o
@@ -25,6 +23,7 @@ CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQU
obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o
obj-$(CONFIG_ARM_SCMI_PROTOCOL) += arm_scmi/
+obj-y += psci/
obj-y += broadcom/
obj-y += meson/
obj-$(CONFIG_GOOGLE_FIRMWARE) += google/
diff --git a/drivers/firmware/psci/Kconfig b/drivers/firmware/psci/Kconfig
new file mode 100644
index 000000000000..26a3b32bf7ab
--- /dev/null
+++ b/drivers/firmware/psci/Kconfig
@@ -0,0 +1,13 @@
+config ARM_PSCI_FW
+ bool
+
+config ARM_PSCI_CHECKER
+ bool "ARM PSCI checker"
+ depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST
+ help
+ Run the PSCI checker during startup. This checks that hotplug and
+ suspend operations work correctly when using PSCI.
+
+ The torture tests may interfere with the PSCI checker by turning CPUs
+ on and off through hotplug, so for now torture tests and PSCI checker
+ are mutually exclusive.
diff --git a/drivers/firmware/psci/Makefile b/drivers/firmware/psci/Makefile
new file mode 100644
index 000000000000..1956b882470f
--- /dev/null
+++ b/drivers/firmware/psci/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+obj-$(CONFIG_ARM_PSCI_FW) += psci.o
+obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci/psci.c
index c80ec1d03274..fe090ef43d28 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -88,6 +88,7 @@ static u32 psci_function_id[PSCI_FN_MAX];
PSCI_1_0_EXT_POWER_STATE_TYPE_MASK)
static u32 psci_cpu_suspend_feature;
+static bool psci_system_reset2_supported;
static inline bool psci_has_ext_power_state(void)
{
@@ -95,6 +96,11 @@ static inline bool psci_has_ext_power_state(void)
PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK;
}
+static inline bool psci_has_osi_support(void)
+{
+ return psci_cpu_suspend_feature & PSCI_1_0_OS_INITIATED;
+}
+
static inline bool psci_power_state_loses_context(u32 state)
{
const u32 mask = psci_has_ext_power_state() ?
@@ -253,7 +259,17 @@ static int get_set_conduit_method(struct device_node *np)
static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
{
- invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+ if ((reboot_mode == REBOOT_WARM || reboot_mode == REBOOT_SOFT) &&
+ psci_system_reset2_supported) {
+ /*
+ * reset_type[31] = 0 (architectural)
+ * reset_type[30:0] = 0 (SYSTEM_WARM_RESET)
+ * cookie = 0 (ignored by the implementation)
+ */
+ invoke_psci_fn(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2), 0, 0, 0);
+ } else {
+ invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+ }
}
static void psci_sys_poweroff(void)
@@ -270,9 +286,26 @@ static int __init psci_features(u32 psci_func_id)
#ifdef CONFIG_CPU_IDLE
static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
+static int psci_dt_parse_state_node(struct device_node *np, u32 *state)
+{
+ int err = of_property_read_u32(np, "arm,psci-suspend-param", state);
+
+ if (err) {
+ pr_warn("%pOF missing arm,psci-suspend-param property\n", np);
+ return err;
+ }
+
+ if (!psci_power_state_is_valid(*state)) {
+ pr_warn("Invalid PSCI power state %#x\n", *state);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
{
- int i, ret, count = 0;
+ int i, ret = 0, count = 0;
u32 *psci_states;
struct device_node *state_node;
@@ -291,29 +324,16 @@ static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
return -ENOMEM;
for (i = 0; i < count; i++) {
- u32 state;
-
state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+ ret = psci_dt_parse_state_node(state_node, &psci_states[i]);
+ of_node_put(state_node);
- ret = of_property_read_u32(state_node,
- "arm,psci-suspend-param",
- &state);
- if (ret) {
- pr_warn(" * %pOF missing arm,psci-suspend-param property\n",
- state_node);
- of_node_put(state_node);
+ if (ret)
goto free_mem;
- }
- of_node_put(state_node);
- pr_debug("psci-power-state %#x index %d\n", state, i);
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- ret = -EINVAL;
- goto free_mem;
- }
- psci_states[i] = state;
+ pr_debug("psci-power-state %#x index %d\n", psci_states[i], i);
}
+
/* Idle states parsed correctly, initialize per-cpu pointer */
per_cpu(psci_power_state, cpu) = psci_states;
return 0;
@@ -451,6 +471,16 @@ static const struct platform_suspend_ops psci_suspend_ops = {
.enter = psci_system_suspend_enter,
};
+static void __init psci_init_system_reset2(void)
+{
+ int ret;
+
+ ret = psci_features(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2));
+
+ if (ret != PSCI_RET_NOT_SUPPORTED)
+ psci_system_reset2_supported = true;
+}
+
static void __init psci_init_system_suspend(void)
{
int ret;
@@ -588,6 +618,7 @@ static int __init psci_probe(void)
psci_init_smccc();
psci_init_cpu_suspend();
psci_init_system_suspend();
+ psci_init_system_reset2();
}
return 0;
@@ -605,9 +636,9 @@ static int __init psci_0_2_init(struct device_node *np)
int err;
err = get_set_conduit_method(np);
-
if (err)
- goto out_put_node;
+ return err;
+
/*
* Starting with v0.2, the PSCI specification introduced a call
* (PSCI_VERSION) that allows probing the firmware version, so
@@ -615,11 +646,7 @@ static int __init psci_0_2_init(struct device_node *np)
* can be carried out according to the specific version reported
* by firmware
*/
- err = psci_probe();
-
-out_put_node:
- of_node_put(np);
- return err;
+ return psci_probe();
}
/*
@@ -631,9 +658,8 @@ static int __init psci_0_1_init(struct device_node *np)
int err;
err = get_set_conduit_method(np);
-
if (err)
- goto out_put_node;
+ return err;
pr_info("Using PSCI v0.1 Function IDs from DT\n");
@@ -657,15 +683,27 @@ static int __init psci_0_1_init(struct device_node *np)
psci_ops.migrate = psci_migrate;
}
-out_put_node:
- of_node_put(np);
- return err;
+ return 0;
+}
+
+static int __init psci_1_0_init(struct device_node *np)
+{
+ int err;
+
+ err = psci_0_2_init(np);
+ if (err)
+ return err;
+
+ if (psci_has_osi_support())
+ pr_info("OSI mode supported.\n");
+
+ return 0;
}
static const struct of_device_id psci_of_match[] __initconst = {
{ .compatible = "arm,psci", .data = psci_0_1_init},
{ .compatible = "arm,psci-0.2", .data = psci_0_2_init},
- { .compatible = "arm,psci-1.0", .data = psci_0_2_init},
+ { .compatible = "arm,psci-1.0", .data = psci_1_0_init},
{},
};
@@ -674,6 +712,7 @@ int __init psci_dt_init(void)
struct device_node *np;
const struct of_device_id *matched_np;
psci_initcall_t init_fn;
+ int ret;
np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
@@ -681,7 +720,10 @@ int __init psci_dt_init(void)
return -ENODEV;
init_fn = (psci_initcall_t)matched_np->data;
- return init_fn(np);
+ ret = init_fn(np);
+
+ of_node_put(np);
+ return ret;
}
#ifdef CONFIG_ACPI
diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci/psci_checker.c
index 346943657962..346943657962 100644
--- a/drivers/firmware/psci_checker.c
+++ b/drivers/firmware/psci/psci_checker.c
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 0420f7e8ad5b..0e7703fe733f 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -526,6 +526,60 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
+/**
+ * dev_pm_opp_find_freq_ceil_by_volt() - Find OPP with highest frequency for
+ * target voltage.
+ * @dev: Device for which we do this operation.
+ * @u_volt: Target voltage.
+ *
+ * Search for OPP with highest (ceil) frequency and has voltage <= u_volt.
+ *
+ * Return: matching *opp, else returns ERR_PTR in case of error which should be
+ * handled using IS_ERR.
+ *
+ * Error return values can be:
+ * EINVAL: bad parameters
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev,
+ unsigned long u_volt)
+{
+ struct opp_table *opp_table;
+ struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+ if (!dev || !u_volt) {
+ dev_err(dev, "%s: Invalid argument volt=%lu\n", __func__,
+ u_volt);
+ return ERR_PTR(-EINVAL);
+ }
+
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table))
+ return ERR_CAST(opp_table);
+
+ mutex_lock(&opp_table->lock);
+
+ list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
+ if (temp_opp->available) {
+ if (temp_opp->supplies[0].u_volt > u_volt)
+ break;
+ opp = temp_opp;
+ }
+ }
+
+ /* Increment the reference count of OPP */
+ if (!IS_ERR(opp))
+ dev_pm_opp_get(opp);
+
+ mutex_unlock(&opp_table->lock);
+ dev_pm_opp_put_opp_table(opp_table);
+
+ return opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_by_volt);
+
static int _set_opp_voltage(struct device *dev, struct regulator *reg,
struct dev_pm_opp_supply *supply)
{
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index b160e98076e3..684caf067003 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -178,6 +178,11 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
#endif
+static inline bool policy_is_inactive(struct cpufreq_policy *policy)
+{
+ return cpumask_empty(policy->cpus);
+}
+
static inline bool policy_is_shared(struct cpufreq_policy *policy)
{
return cpumask_weight(policy->cpus) > 1;
@@ -193,8 +198,14 @@ unsigned int cpufreq_quick_get_max(unsigned int cpu);
void disable_cpufreq(void);
u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
+
+struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu);
+void cpufreq_cpu_release(struct cpufreq_policy *policy);
int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
+int cpufreq_set_policy(struct cpufreq_policy *policy,
+ struct cpufreq_policy *new_policy);
void cpufreq_update_policy(unsigned int cpu);
+void cpufreq_update_limits(unsigned int cpu);
bool have_governor_per_policy(void);
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
@@ -322,6 +333,9 @@ struct cpufreq_driver {
/* should be defined, if possible */
unsigned int (*get)(unsigned int cpu);
+ /* Called to update policy limits on firmware notifications. */
+ void (*update_limits)(unsigned int cpu);
+
/* optional */
int (*bios_limit)(int cpu, unsigned int *limit);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e78281d07b70..dbfdd0fadbef 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -147,6 +147,7 @@ enum cpuhp_state {
CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE,
CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
+ CPUHP_AP_X86_INTEL_EPB_ONLINE,
CPUHP_AP_PERF_ONLINE,
CPUHP_AP_PERF_X86_ONLINE,
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3b39472324a3..bb9a0db89f1a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -83,6 +83,7 @@ struct cpuidle_device {
unsigned int use_deepest_state:1;
unsigned int poll_time_limit:1;
unsigned int cpu;
+ ktime_t next_hrtimer;
int last_residency;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 1ed5874bcee0..0e8e356bed6a 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -16,6 +16,7 @@
#include <linux/of.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
+#include <linux/cpumask.h>
/*
* Flags to control the behaviour of a genpd.
@@ -42,11 +43,22 @@
* GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered
* on, in case any of its attached devices is used
* in the wakeup path to serve system wakeups.
+ *
+ * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get
+ * devices attached, which may belong to CPUs or
+ * possibly have subdomains with CPUs attached.
+ * This flag enables the genpd backend driver to
+ * deploy idle power management support for CPUs
+ * and groups of CPUs. Note that, the backend
+ * driver must then comply with the so called,
+ * last-man-standing algorithm, for the CPUs in the
+ * PM domain.
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
#define GENPD_FLAG_ALWAYS_ON (1U << 2)
#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3)
+#define GENPD_FLAG_CPU_DOMAIN (1U << 4)
enum gpd_status {
GPD_STATE_ACTIVE = 0, /* PM domain is active */
@@ -69,6 +81,7 @@ struct genpd_power_state {
s64 residency_ns;
struct fwnode_handle *fwnode;
ktime_t idle_time;
+ void *data;
};
struct genpd_lock_ops;
@@ -93,6 +106,7 @@ struct generic_pm_domain {
unsigned int suspended_count; /* System suspend device counter */
unsigned int prepared_count; /* Suspend counter of prepared devices */
unsigned int performance_state; /* Aggregated max performance state */
+ cpumask_var_t cpus; /* A cpumask of the attached CPUs */
int (*power_off)(struct generic_pm_domain *domain);
int (*power_on)(struct generic_pm_domain *domain);
struct opp_table *opp_table; /* OPP table of the genpd */
@@ -104,15 +118,17 @@ struct generic_pm_domain {
s64 max_off_time_ns; /* Maximum allowed "suspended" time. */
bool max_off_time_changed;
bool cached_power_down_ok;
+ bool cached_power_down_state_idx;
int (*attach_dev)(struct generic_pm_domain *domain,
struct device *dev);
void (*detach_dev)(struct generic_pm_domain *domain,
struct device *dev);
unsigned int flags; /* Bit field of configs for genpd */
struct genpd_power_state *states;
+ void (*free_states)(struct genpd_power_state *states,
+ unsigned int state_count);
unsigned int state_count; /* number of states */
unsigned int state_idx; /* state that genpd will go to when off */
- void *free; /* Free the state that was allocated for default */
ktime_t on_time;
ktime_t accounting_time;
const struct genpd_lock_ops *lock_ops;
@@ -159,6 +175,7 @@ struct generic_pm_domain_data {
struct pm_domain_data base;
struct gpd_timing_data td;
struct notifier_block nb;
+ int cpu;
unsigned int performance_state;
void *data;
};
@@ -187,6 +204,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
extern struct dev_power_governor simple_qos_governor;
extern struct dev_power_governor pm_domain_always_on_gov;
+#ifdef CONFIG_CPU_IDLE
+extern struct dev_power_governor pm_domain_cpu_gov;
+#endif
#else
static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 24c757a32a7b..b150fe97ce5a 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -102,6 +102,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
unsigned long *freq);
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev,
+ unsigned long u_volt);
struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
unsigned long *freq);
@@ -207,6 +209,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
return ERR_PTR(-ENOTSUPP);
}
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev,
+ unsigned long u_volt)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+
static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
unsigned long *freq)
{
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 3f529ad9a9d2..6b3ea9ea6a9e 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -425,6 +425,7 @@ void restore_processor_state(void);
/* kernel/power/main.c */
extern int register_pm_notifier(struct notifier_block *nb);
extern int unregister_pm_notifier(struct notifier_block *nb);
+extern void ksys_sync_helper(void);
#define pm_notifier(fn, pri) { \
static struct notifier_block fn##_nb = \
@@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
return 0;
}
+static inline void ksys_sync_helper(void) {}
+
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
static inline bool pm_wakeup_pending(void) { return false; }
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 76acb48acdb7..f92a10b5e112 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -128,6 +128,7 @@ extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
extern bool tick_nohz_idle_got_tick(void);
+extern ktime_t tick_nohz_get_next_hrtimer(void);
extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
@@ -151,7 +152,11 @@ static inline void tick_nohz_idle_restart_tick(void) { }
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
static inline bool tick_nohz_idle_got_tick(void) { return false; }
-
+static inline ktime_t tick_nohz_get_next_hrtimer(void)
+{
+ /* Next wake up is the tick period, assume it starts now */
+ return ktime_add(ktime_get(), TICK_NSEC);
+}
static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
*delta_next = TICK_NSEC;
diff --git a/include/soc/rockchip/rk3399_grf.h b/include/soc/rockchip/rk3399_grf.h
new file mode 100644
index 000000000000..3eebabcb2812
--- /dev/null
+++ b/include/soc/rockchip/rk3399_grf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Rockchip General Register Files definitions
+ *
+ * Copyright (c) 2018, Collabora Ltd.
+ * Author: Enric Balletbo i Serra <enric.balletbo@collabora.com>
+ */
+
+#ifndef __SOC_RK3399_GRF_H
+#define __SOC_RK3399_GRF_H
+
+/* PMU GRF Registers */
+#define RK3399_PMUGRF_OS_REG2 0x308
+#define RK3399_PMUGRF_DDRTYPE_SHIFT 13
+#define RK3399_PMUGRF_DDRTYPE_MASK 7
+#define RK3399_PMUGRF_DDRTYPE_DDR3 3
+#define RK3399_PMUGRF_DDRTYPE_LPDDR2 5
+#define RK3399_PMUGRF_DDRTYPE_LPDDR3 6
+#define RK3399_PMUGRF_DDRTYPE_LPDDR4 7
+
+#endif
diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h
index 7e28092c4d3d..ad9482c56797 100644
--- a/include/soc/rockchip/rockchip_sip.h
+++ b/include/soc/rockchip/rockchip_sip.h
@@ -23,5 +23,6 @@
#define ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE 0x05
#define ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ 0x06
#define ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM 0x07
+#define ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD 0x08
#endif
diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h
new file mode 100644
index 000000000000..cf5b8772175d
--- /dev/null
+++ b/include/trace/events/devfreq.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM devfreq
+
+#if !defined(_TRACE_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DEVFREQ_H
+
+#include <linux/devfreq.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(devfreq_monitor,
+ TP_PROTO(struct devfreq *devfreq),
+
+ TP_ARGS(devfreq),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, freq)
+ __field(unsigned long, busy_time)
+ __field(unsigned long, total_time)
+ __field(unsigned int, polling_ms)
+ __string(dev_name, dev_name(&devfreq->dev))
+ ),
+
+ TP_fast_assign(
+ __entry->freq = devfreq->previous_freq;
+ __entry->busy_time = devfreq->last_status.busy_time;
+ __entry->total_time = devfreq->last_status.total_time;
+ __entry->polling_ms = devfreq->profile->polling_ms;
+ __assign_str(dev_name, dev_name(&devfreq->dev));
+ ),
+
+ TP_printk("dev_name=%s freq=%lu polling_ms=%u load=%lu",
+ __get_str(dev_name), __entry->freq, __entry->polling_ms,
+ __entry->total_time == 0 ? 0 :
+ (100 * __entry->busy_time) / __entry->total_time)
+);
+#endif /* _TRACE_DEVFREQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index b3bcabe380da..2fcad1dd0b0e 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -49,8 +49,11 @@
#define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10)
#define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14)
+#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15)
+#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18)
#define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14)
+#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18)
/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
@@ -97,6 +100,10 @@
#define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK \
(0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT)
+#define PSCI_1_0_OS_INITIATED BIT(0)
+#define PSCI_1_0_SUSPEND_MODE_PC 0
+#define PSCI_1_0_SUSPEND_MODE_OSI 1
+
/* PSCI return values (inclusive of all PSCI versions) */
#define PSCI_RET_SUCCESS 0
#define PSCI_RET_NOT_SUPPORTED -1
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index cfc7a57049e4..c8c272df7154 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -14,7 +14,6 @@
#include <linux/export.h>
#include <linux/suspend.h>
-#include <linux/syscalls.h>
#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/device.h>
@@ -709,9 +708,7 @@ int hibernate(void)
goto Exit;
}
- pr_info("Syncing filesystems ... \n");
- ksys_sync();
- pr_info("done.\n");
+ ksys_sync_helper();
error = freeze_processes();
if (error)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 98e76cad128b..4f43e724f6eb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,7 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/suspend.h>
+#include <linux/syscalls.h>
#include "power.h"
@@ -51,6 +52,19 @@ void unlock_system_sleep(void)
}
EXPORT_SYMBOL_GPL(unlock_system_sleep);
+void ksys_sync_helper(void)
+{
+ ktime_t start;
+ long elapsed_msecs;
+
+ start = ktime_get();
+ ksys_sync();
+ elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start));
+ pr_info("Filesystems sync: %ld.%03ld seconds\n",
+ elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC);
+}
+EXPORT_SYMBOL_GPL(ksys_sync_helper);
+
/* Routines for PM-transition notifications */
static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 59b6def23046..ef908c134b34 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -17,7 +17,6 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
-#include <linux/syscalls.h>
#include <linux/gfp.h>
#include <linux/io.h>
#include <linux/kernel.h>
@@ -568,13 +567,11 @@ static int enter_state(suspend_state_t state)
if (state == PM_SUSPEND_TO_IDLE)
s2idle_begin();
-#ifndef CONFIG_SUSPEND_SKIP_SYNC
- trace_suspend_resume(TPS("sync_filesystems"), 0, true);
- pr_info("Syncing filesystems ... ");
- ksys_sync();
- pr_cont("done.\n");
- trace_suspend_resume(TPS("sync_filesystems"), 0, false);
-#endif
+ if (!IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC)) {
+ trace_suspend_resume(TPS("sync_filesystems"), 0, true);
+ ksys_sync_helper();
+ trace_suspend_resume(TPS("sync_filesystems"), 0, false);
+ }
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 2d8b60a3c86b..cb24e840a3e6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -10,7 +10,6 @@
*/
#include <linux/suspend.h>
-#include <linux/syscalls.h>
#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/device.h>
@@ -228,9 +227,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
if (data->frozen)
break;
- printk("Syncing filesystems ... ");
- ksys_sync();
- printk("done.\n");
+ ksys_sync_helper();
error = freeze_processes();
if (error)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3638d2377e3c..5403479073b0 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -13,6 +13,8 @@
#include <linux/sched/cpufreq.h>
#include <trace/events/power.h>
+#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
+
struct sugov_tunables {
struct gov_attr_set attr_set;
unsigned int rate_limit_us;
@@ -51,7 +53,6 @@ struct sugov_cpu {
u64 last_update;
unsigned long bw_dl;
- unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -291,8 +292,8 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
*
* The IO wait boost of a task is disabled after a tick since the last update
* of a CPU. If a new IO wait boost is requested after more then a tick, then
- * we enable the boost starting from the minimum frequency, which improves
- * energy efficiency by ignoring sporadic wakeups from IO.
+ * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
+ * efficiency by ignoring sporadic wakeups from IO.
*/
static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
bool set_iowait_boost)
@@ -303,7 +304,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -317,8 +318,9 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
*
* Each time a task wakes up after an IO operation, the CPU utilization can be
* boosted to a certain utilization which doubles at each "frequent and
- * successive" wakeup from IO, ranging from the utilization of the minimum
- * OPP to the utilization of the maximum OPP.
+ * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
+ * of the maximum OPP.
+ *
* To keep doubling, an IO boost has to be requested at least once per tick,
* otherwise we restart from the utilization of the minimum OPP.
*/
@@ -349,7 +351,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->min;
+ sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
}
/**
@@ -389,7 +391,7 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
* No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->min) {
+ if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
sg_cpu->iowait_boost = 0;
return util;
}
@@ -827,9 +829,6 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->min =
- (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
- policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index bdf00c763ee3..f4ee1a3428ae 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1038,6 +1038,18 @@ bool tick_nohz_idle_got_tick(void)
}
/**
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
+ * or the tick, whatever that expires first. Note that, if the tick has been
+ * stopped, it returns the next hrtimer.
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_next_hrtimer(void)
+{
+ return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
+}
+
+/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
*