summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu2
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/multihit.rst163
-rw-r--r--Documentation/admin-guide/hw-vuln/tsx_async_abort.rst276
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt92
-rw-r--r--Documentation/devicetree/bindings/arm/omap/omap.txt30
-rw-r--r--Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt6
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt26
-rw-r--r--Documentation/devicetree/bindings/devfreq/exynos-bus.txt2
-rw-r--r--Documentation/power/drivers-testing.rst7
-rw-r--r--Documentation/power/freezing-of-tasks.rst37
-rw-r--r--Documentation/power/opp.rst32
-rw-r--r--Documentation/power/pci.rst28
-rw-r--r--Documentation/power/pm_qos_interface.rst26
-rw-r--r--Documentation/power/runtime_pm.rst4
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.rst7
-rw-r--r--Documentation/power/swsusp.rst14
-rw-r--r--Documentation/x86/index.rst1
-rw-r--r--Documentation/x86/tsx_async_abort.rst117
-rw-r--r--MAINTAINERS37
-rw-r--r--Makefile5
-rw-r--r--arch/arm/boot/dts/am3517.dtsi31
-rw-r--r--arch/arm/boot/dts/am3517_mt_ventoux.dts2
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts2
-rw-r--r--arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts2
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts2
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts2
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3530.dts2
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3730.dts2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000-lcd43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000-lcd70.dts2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000.dts2
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-ha-lcd.dts2
-rw-r--r--arch/arm/boot/dts/omap3-ha.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0020-rev-f.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0020.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0030-rev-g.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0030.dts2
-rw-r--r--arch/arm/boot/dts/omap3-ldp.dts2
-rw-r--r--arch/arm/boot/dts/omap3-lilly-a83x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-lilly-dbb056.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n9.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n950-n9.dtsi7
-rw-r--r--arch/arm/boot/dts/omap3-n950.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-alto35.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-gallop43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-palo35.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-palo43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-summit.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-tobi.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts2
-rw-r--r--arch/arm/boot/dts/omap3-pandora-1ghz.dts2
-rw-r--r--arch/arm/boot/dts/omap3-sbc-t3530.dts2
-rw-r--r--arch/arm/boot/dts/omap3-sbc-t3730.dts2
-rw-r--r--arch/arm/boot/dts/omap3-sniper.dts2
-rw-r--r--arch/arm/boot/dts/omap3-thunder.dts2
-rw-r--r--arch/arm/boot/dts/omap3-zoom3.dts2
-rw-r--r--arch/arm/boot/dts/omap3430-sdp.dts2
-rw-r--r--arch/arm/boot/dts/omap34xx.dtsi66
-rw-r--r--arch/arm/boot/dts/omap36xx.dtsi65
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6q.c4
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra20.c2
-rw-r--r--arch/mips/sgi-ip27/Kconfig7
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c21
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c4
-rw-r--r--arch/sparc/vdso/Makefile4
-rw-r--r--arch/x86/Kconfig45
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/nospec-branch.h4
-rw-r--r--arch/x86/include/asm/processor.h7
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/bugs.c159
-rw-r--r--arch/x86/kernel/cpu/common.c99
-rw-r--r--arch/x86/kernel/cpu/cpu.h18
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c4
-rw-r--r--arch/x86/kernel/cpu/tsx.c140
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kvm/mmu.c282
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h29
-rw-r--r--arch/x86/kvm/vmx/vmx.c23
-rw-r--r--arch/x86/kvm/vmx/vmx.h11
-rw-r--r--arch/x86/kvm/x86.c99
-rw-r--r--block/bfq-iosched.c32
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-iocost.c8
-rw-r--r--drivers/acpi/processor_idle.c21
-rw-r--r--drivers/base/cpu.c17
-rw-r--r--drivers/base/memory.c36
-rw-r--r--drivers/base/power/common.c20
-rw-r--r--drivers/base/power/domain.c40
-rw-r--r--drivers/base/power/power.h30
-rw-r--r--drivers/base/power/wakeirq.c4
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/rsxx/core.c2
-rw-r--r--drivers/char/hw_random/core.c5
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/cpufreq/Kconfig.arm12
-rw-r--r--drivers/cpufreq/Makefile2
-rw-r--r--drivers/cpufreq/arm_big_little.c658
-rw-r--r--drivers/cpufreq/arm_big_little.h43
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c2
-rw-r--r--drivers/cpufreq/cpufreq.c18
-rw-r--r--drivers/cpufreq/imx-cpufreq-dt.c20
-rw-r--r--drivers/cpufreq/intel_pstate.c30
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c17
-rw-r--r--drivers/cpufreq/s3c64xx-cpufreq.c7
-rw-r--r--drivers/cpufreq/scpi-cpufreq.c2
-rw-r--r--drivers/cpufreq/sun50i-cpufreq-nvmem.c25
-rw-r--r--drivers/cpufreq/ti-cpufreq.c119
-rw-r--r--drivers/cpufreq/vexpress-spc-cpufreq.c584
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c7
-rw-r--r--drivers/cpuidle/cpuidle.c72
-rw-r--r--drivers/cpuidle/driver.c72
-rw-r--r--drivers/cpuidle/governor.c7
-rw-r--r--drivers/cpuidle/governors/haltpoll.c7
-rw-r--r--drivers/cpuidle/governors/ladder.c29
-rw-r--r--drivers/cpuidle/governors/menu.c131
-rw-r--r--drivers/cpuidle/governors/teo.c182
-rw-r--r--drivers/cpuidle/poll_state.c2
-rw-r--r--drivers/cpuidle/sysfs.c71
-rw-r--r--drivers/devfreq/devfreq.c33
-rw-r--r--drivers/devfreq/event/exynos-ppmu.c1
-rw-r--r--drivers/devfreq/governor.h3
-rw-r--r--drivers/devfreq/tegra30-devfreq.c417
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c38
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c111
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c435
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c7
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h31
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c16
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c122
-rw-r--r--drivers/gpu/drm/i915/intel_pm.h3
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c2
-rw-r--r--drivers/i2c/i2c-core-acpi.c28
-rw-r--r--drivers/i2c/i2c-core-of.c4
-rw-r--r--drivers/infiniband/hw/hfi1/init.c1
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c4
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c16
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c57
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/input/ff-memless.c9
-rw-r--r--drivers/input/mouse/synaptics.c1
-rw-r--r--drivers/input/rmi4/rmi_f11.c9
-rw-r--r--drivers/input/rmi4/rmi_f12.c32
-rw-r--r--drivers/input/rmi4/rmi_f54.c5
-rw-r--r--drivers/input/touchscreen/cyttsp4_core.c7
-rw-r--r--drivers/mmc/host/sdhci-of-at91.c2
-rw-r--r--drivers/mmc/host/tmio_mmc.h1
-rw-r--r--drivers/mmc/host/tmio_mmc_core.c10
-rw-r--r--drivers/net/can/slcan.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/ptp.c13
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/cirrus/ep93xx_eth.c5
-rw-r--r--drivers/net/ethernet/cortina/gemini.c1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c10
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c19
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c16
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/common.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c5
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ptp.c4
-rw-r--r--drivers/net/ethernet/renesas/ravb.h3
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c26
-rw-r--r--drivers/net/ethernet/renesas/ravb_ptp.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c4
-rw-r--r--drivers/net/netdevsim/dev.c2
-rw-r--r--drivers/net/phy/dp83640.c16
-rw-r--r--drivers/net/phy/mdio_bus.c11
-rw-r--r--drivers/net/slip/slip.c1
-rw-r--r--drivers/net/usb/ax88172a.c2
-rw-r--r--drivers/net/usb/cdc_ncm.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c20
-rw-r--r--drivers/nfc/nxp-nci/i2c.c6
-rw-r--r--drivers/opp/core.c69
-rw-r--r--drivers/powercap/intel_rapl_common.c2
-rw-r--r--drivers/ptp/ptp_chardev.c20
-rw-r--r--drivers/scsi/qla2xxx/qla_mid.c8
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c8
-rw-r--r--drivers/scsi/scsi_lib.c3
-rw-r--r--drivers/scsi/sd_zbc.c29
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/vboxsf/Kconfig10
-rw-r--r--drivers/staging/vboxsf/Makefile5
-rw-r--r--drivers/staging/vboxsf/TODO7
-rw-r--r--drivers/staging/vboxsf/dir.c418
-rw-r--r--drivers/staging/vboxsf/file.c370
-rw-r--r--drivers/staging/vboxsf/shfl_hostintf.h901
-rw-r--r--drivers/staging/vboxsf/super.c501
-rw-r--r--drivers/staging/vboxsf/utils.c551
-rw-r--r--drivers/staging/vboxsf/vboxsf_wrappers.c371
-rw-r--r--drivers/staging/vboxsf/vfsmod.h137
-rw-r--r--fs/afs/dir.c7
-rw-r--r--fs/aio.c10
-rw-r--r--fs/autofs/expire.c5
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/ceph/file.c29
-rw-r--r--fs/ecryptfs/inode.c84
-rw-r--r--fs/exportfs/expfs.c31
-rw-r--r--fs/io_uring.c32
-rw-r--r--fs/namespace.c15
-rw-r--r--include/dt-bindings/pmu/exynos_ppmu.h25
-rw-r--r--include/linux/can/core.h1
-rw-r--r--include/linux/cpu.h37
-rw-r--r--include/linux/cpuidle.h27
-rw-r--r--include/linux/intel-iommu.h6
-rw-r--r--include/linux/kvm_host.h7
-rw-r--r--include/linux/memory.h1
-rw-r--r--include/linux/pm.h2
-rw-r--r--include/linux/pm_domain.h5
-rw-r--r--include/linux/pm_opp.h13
-rw-r--r--include/net/devlink.h5
-rw-r--r--include/trace/events/tcp.h2
-rw-r--r--include/uapi/linux/devlink.h1
-rw-r--r--include/uapi/linux/ptp_clock.h5
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--kernel/cpu.c27
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/power/qos.c8
-rw-r--r--kernel/power/snapshot.c9
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c29
-rw-r--r--kernel/sched/idle.c24
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--lib/xz/xz_dec_lzma2.c1
-rw-r--r--mm/debug.c31
-rw-r--r--mm/hugetlb_cgroup.c2
-rw-r--r--mm/khugepaged.c28
-rw-r--r--mm/madvise.c16
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory_hotplug.c43
-rw-r--r--mm/mempolicy.c14
-rw-r--r--mm/page_io.c6
-rw-r--r--mm/slub.c39
-rw-r--r--net/can/af_can.c3
-rw-r--r--net/can/j1939/main.c9
-rw-r--r--net/can/j1939/socket.c94
-rw-r--r--net/can/j1939/transport.c36
-rw-r--r--net/core/devlink.c45
-rw-r--r--net/dsa/tag_8021q.c2
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv6/seg6_local.c11
-rw-r--r--net/rds/ib_cm.c23
-rw-r--r--net/smc/af_smc.c3
-rw-r--r--net/tipc/core.c2
-rw-r--r--net/tipc/core.h6
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_state.c2
-rwxr-xr-xscripts/tools-support-relr.sh8
-rw-r--r--sound/core/pcm_lib.c8
-rw-r--r--sound/pci/hda/hda_intel.c3
-rw-r--r--sound/pci/hda/patch_hdmi.c4
-rw-r--r--sound/usb/endpoint.c3
-rw-r--r--sound/usb/mixer.c4
-rw-r--r--sound/usb/quirks.c4
-rw-r--r--sound/usb/validate.c6
-rw-r--r--tools/power/cpupower/ToDo14
-rw-r--r--tools/power/cpupower/utils/cpupower-info.c9
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c9
-rw-r--r--tools/power/cpupower/utils/helpers/cpuid.c4
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h1
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h5
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c3
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c64
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh8
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c4
-rw-r--r--tools/testing/selftests/ptp/testptp.c53
-rw-r--r--virt/kvm/kvm_main.c175
308 files changed, 5492 insertions, 5487 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 06d0931119cc..fc20cde63d1e 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -486,6 +486,8 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/l1tf
/sys/devices/system/cpu/vulnerabilities/mds
+ /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ /sys/devices/system/cpu/vulnerabilities/itlb_multihit
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 49311f3da6f2..0795e3c2643f 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -12,3 +12,5 @@ are configurable at compile, boot or run time.
spectre
l1tf
mds
+ tsx_async_abort
+ multihit.rst
diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst
new file mode 100644
index 000000000000..ba9988d8bce5
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/multihit.rst
@@ -0,0 +1,163 @@
+iTLB multihit
+=============
+
+iTLB multihit is an erratum where some processors may incur a machine check
+error, possibly resulting in an unrecoverable CPU lockup, when an
+instruction fetch hits multiple entries in the instruction TLB. This can
+occur when the page size is changed along with either the physical address
+or cache type. A malicious guest running on a virtualized system can
+exploit this erratum to perform a denial of service attack.
+
+
+Affected processors
+-------------------
+
+Variations of this erratum are present on most Intel Core and Xeon processor
+models. The erratum is not present on:
+
+ - non-Intel processors
+
+ - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
+
+ - Intel processors that have the PSCHANGE_MC_NO bit set in the
+ IA32_ARCH_CAPABILITIES MSR.
+
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+ ============== =================================================
+ CVE-2018-12207 Machine Check Error Avoidance on Page Size Change
+ ============== =================================================
+
+
+Problem
+-------
+
+Privileged software, including OS and virtual machine managers (VMM), are in
+charge of memory management. A key component in memory management is the control
+of the page tables. Modern processors use virtual memory, a technique that creates
+the illusion of a very large memory for processors. This virtual space is split
+into pages of a given size. Page tables translate virtual addresses to physical
+addresses.
+
+To reduce latency when performing a virtual to physical address translation,
+processors include a structure, called TLB, that caches recent translations.
+There are separate TLBs for instruction (iTLB) and data (dTLB).
+
+Under this errata, instructions are fetched from a linear address translated
+using a 4 KB translation cached in the iTLB. Privileged software modifies the
+paging structure so that the same linear address using large page size (2 MB, 4
+MB, 1 GB) with a different physical address or memory type. After the page
+structure modification but before the software invalidates any iTLB entries for
+the linear address, a code fetch that happens on the same linear address may
+cause a machine-check error which can result in a system hang or shutdown.
+
+
+Attack scenarios
+----------------
+
+Attacks against the iTLB multihit erratum can be mounted from malicious
+guests in a virtualized system.
+
+
+iTLB multihit system information
+--------------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current iTLB
+multihit status of the system:whether the system is vulnerable and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - Not affected
+ - The processor is not vulnerable.
+ * - KVM: Mitigation: Split huge pages
+ - Software changes mitigate this issue.
+ * - KVM: Vulnerable
+ - The processor is vulnerable, but no mitigation enabled
+
+
+Enumeration of the erratum
+--------------------------------
+
+A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
+and will be set on CPU's which are mitigated against this issue.
+
+ ======================================= =========== ===============================
+ IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable
+ ======================================= =========== ===============================
+
+
+Mitigation mechanism
+-------------------------
+
+This erratum can be mitigated by restricting the use of large page sizes to
+non-executable pages. This forces all iTLB entries to be 4K, and removes
+the possibility of multiple hits.
+
+In order to mitigate the vulnerability, KVM initially marks all huge pages
+as non-executable. If the guest attempts to execute in one of those pages,
+the page is broken down into 4K pages, which are then marked executable.
+
+If EPT is disabled or not available on the host, KVM is in control of TLB
+flushes and the problematic situation cannot happen. However, the shadow
+EPT paging mechanism used by nested virtualization is vulnerable, because
+the nested guest can trigger multiple iTLB hits by modifying its own
+(non-nested) page tables. For simplicity, KVM will make large pages
+non-executable in all shadow paging modes.
+
+Mitigation control on the kernel command line and KVM - module parameter
+------------------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism for marking huge pages as
+non-executable can be controlled with a module parameter "nx_huge_pages=".
+The kernel command line allows to control the iTLB multihit mitigations at
+boot time with the option "kvm.nx_huge_pages=".
+
+The valid arguments for these options are:
+
+ ========== ================================================================
+ force Mitigation is enabled. In this case, the mitigation implements
+ non-executable huge pages in Linux kernel KVM module. All huge
+ pages in the EPT are marked as non-executable.
+ If a guest attempts to execute in one of those pages, the page is
+ broken down into 4K pages, which are then marked executable.
+
+ off Mitigation is disabled.
+
+ auto Enable mitigation only if the platform is affected and the kernel
+ was not booted with the "mitigations=off" command line parameter.
+ This is the default option.
+ ========== ================================================================
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The system is protected by the kernel unconditionally and no further
+ action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ If the guest comes from a trusted source, you may assume that the guest will
+ not attempt to maliciously exploit these errata and no further action is
+ required.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ If the guest comes from an untrusted source, the guest host kernel will need
+ to apply iTLB multihit mitigation via the kernel command line or kvm
+ module parameter.
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
new file mode 100644
index 000000000000..fddbd7579c53
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -0,0 +1,276 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TAA - TSX Asynchronous Abort
+======================================
+
+TAA is a hardware vulnerability that allows unprivileged speculative access to
+data which is available in various CPU internal buffers by using asynchronous
+aborts within an Intel TSX transactional region.
+
+Affected processors
+-------------------
+
+This vulnerability only affects Intel processors that support Intel
+Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
+is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit
+(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
+also mitigate against TAA.
+
+Whether a processor is affected or not can be read out from the TAA
+vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entry is related to this TAA issue:
+
+ ============== ===== ===================================================
+ CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some
+ microprocessors utilizing speculative execution may
+ allow an authenticated user to potentially enable
+ information disclosure via a side channel with
+ local access.
+ ============== ===== ===================================================
+
+Problem
+-------
+
+When performing store, load or L1 refill operations, processors write
+data into temporary microarchitectural structures (buffers). The data in
+those buffers can be forwarded to load operations as an optimization.
+
+Intel TSX is an extension to the x86 instruction set architecture that adds
+hardware transactional memory support to improve performance of multi-threaded
+software. TSX lets the processor expose and exploit concurrency hidden in an
+application due to dynamically avoiding unnecessary synchronization.
+
+TSX supports atomic memory transactions that are either committed (success) or
+aborted. During an abort, operations that happened within the transactional region
+are rolled back. An asynchronous abort takes place, among other options, when a
+different thread accesses a cache line that is also used within the transactional
+region when that access might lead to a data race.
+
+Immediately after an uncompleted asynchronous abort, certain speculatively
+executed loads may read data from those internal buffers and pass it to dependent
+operations. This can be then used to infer the value via a cache side channel
+attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+The victim of a malicious actor does not need to make use of TSX. Only the
+attacker needs to begin a TSX transaction and raise an asynchronous abort
+which in turn potenitally leaks data stored in the buffers.
+
+More detailed technical information is available in the TAA specific x86
+architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the TAA vulnerability can be implemented from unprivileged
+applications running on hosts or guests.
+
+As for MDS, the attacker has no control over the memory addresses that can
+be leaked. Only the victim is responsible for bringing data to the CPU. As
+a result, the malicious actor has to sample as much data as possible and
+then postprocess it to try to infer any useful information from it.
+
+A potential attacker only has read access to the data. Also, there is no direct
+privilege escalation by using this technique.
+
+
+.. _tsx_async_abort_sys_info:
+
+TAA system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current TAA status
+of mitigated systems. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - 'Vulnerable'
+ - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The system tries to clear the buffers but the microcode might not support the operation.
+ * - 'Mitigation: Clear CPU buffers'
+ - The microcode has been updated to clear the buffers. TSX is still enabled.
+ * - 'Mitigation: TSX disabled'
+ - TSX is disabled.
+ * - 'Not affected'
+ - The CPU is not affected by this issue.
+
+.. _ucode_needed:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the processor is vulnerable, but the availability of the microcode-based
+mitigation mechanism is not advertised via CPUID the kernel selects a best
+effort mitigation mode. This mode invokes the mitigation instructions
+without a guarantee that they clear the CPU buffers.
+
+This is done to address virtualization scenarios where the host has the
+microcode update applied, but the hypervisor is not yet updated to expose the
+CPUID to the guest. If the host has updated microcode the protection takes
+effect; otherwise a few CPU cycles are wasted pointlessly.
+
+The state in the tsx_async_abort sysfs file reflects this situation
+accordingly.
+
+
+Mitigation mechanism
+--------------------
+
+The kernel detects the affected CPUs and the presence of the microcode which is
+required. If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default.
+
+
+The mitigation can be controlled at boot time via a kernel command line option.
+See :ref:`taa_mitigation_control_command_line`.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Affected systems where the host has TAA microcode and TAA is mitigated by
+having disabled TSX previously, are not vulnerable regardless of the status
+of the VMs.
+
+In all other cases, if the host either does not have the TAA microcode or
+the kernel is not mitigated, the system might be vulnerable.
+
+
+.. _taa_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the TAA mitigations at boot time with
+the option "tsx_async_abort=". The valid arguments for this option are:
+
+ ============ =============================================================
+ off This option disables the TAA mitigation on affected platforms.
+ If the system has TSX enabled (see next parameter) and the CPU
+ is affected, the system is vulnerable.
+
+ full TAA mitigation is enabled. If TSX is enabled, on an affected
+ system it will clear CPU buffers on ring transitions. On
+ systems which are MDS-affected and deploy MDS mitigation,
+ TAA is also mitigated. Specifying this option on those
+ systems will have no effect.
+
+ full,nosmt The same as tsx_async_abort=full, with SMT disabled on
+ vulnerable CPUs that have TSX enabled. This is the complete
+ mitigation. When TSX is disabled, SMT is not disabled because
+ CPU is not vulnerable to cross-thread TAA attacks.
+ ============ =============================================================
+
+Not specifying this option is equivalent to "tsx_async_abort=full".
+
+The kernel command line also allows to control the TSX feature using the
+parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+to control the TSX feature and the enumeration of the TSX feature bits (RTM
+and HLE) in CPUID.
+
+The valid options are:
+
+ ============ =============================================================
+ off Disables TSX on the system.
+
+ Note that this option takes effect only on newer CPUs which are
+ not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
+ and which get the new IA32_TSX_CTRL MSR through a microcode
+ update. This new MSR allows for the reliable deactivation of
+ the TSX functionality.
+
+ on Enables TSX.
+
+ Although there are mitigations for all known security
+ vulnerabilities, TSX has been known to be an accelerator for
+ several previous speculation-related CVEs, and so there may be
+ unknown security risks associated with leaving it enabled.
+
+ auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
+ on the system.
+ ============ =============================================================
+
+Not specifying this option is equivalent to "tsx=off".
+
+The following combinations of the "tsx_async_abort" and "tsx" are possible. For
+affected platforms tsx=auto is equivalent to tsx=off and the result will be:
+
+ ========= ========================== =========================================
+ tsx=on tsx_async_abort=full The system will use VERW to clear CPU
+ buffers. Cross-thread attacks are still
+ possible on SMT machines.
+ tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT
+ mitigated.
+ tsx=on tsx_async_abort=off The system is vulnerable.
+ tsx=off tsx_async_abort=full TSX might be disabled if microcode
+ provides a TSX control MSR. If so,
+ system is not vulnerable.
+ tsx=off tsx_async_abort=full,nosmt Ditto
+ tsx=off tsx_async_abort=off ditto
+ ========= ========================== =========================================
+
+
+For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
+buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
+"tsx" command line argument has no effect.
+
+For the affected platforms below table indicates the mitigation status for the
+combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
+and TSX_CTRL_MSR.
+
+ ======= ========= ============= ========================================
+ MDS_NO MD_CLEAR TSX_CTRL_MSR Status
+ ======= ========= ============= ========================================
+ 0 0 0 Vulnerable (needs microcode)
+ 0 1 0 MDS and TAA mitigated via VERW
+ 1 1 0 MDS fixed, TAA vulnerable if TSX enabled
+ because MD_CLEAR has no meaning and
+ VERW is not guaranteed to clear buffers
+ 1 X 1 MDS fixed, TAA can be mitigated by
+ VERW or TSX_CTRL_MSR
+ ======= ========= ============= ========================================
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If all user space applications are from a trusted source and do not execute
+untrusted code which is supplied externally, then the mitigation can be
+disabled. The same applies to virtualized environments with trusted guests.
+
+
+2. Untrusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there are untrusted applications or guests on the system, enabling TSX
+might allow a malicious actor to leak data from the host or from other
+processes running on the same physical core.
+
+If the microcode is available and the TSX is disabled on the host, attacks
+are prevented in a virtualized environment as well, even if the VMs do not
+explicitly enable the mitigation.
+
+
+.. _taa_default_mitigations:
+
+Default mitigations
+-------------------
+
+The kernel's default action for vulnerable processors is:
+
+ - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a84a83f8881e..8dee8f68fe15 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2055,6 +2055,25 @@
KVM MMU at runtime.
Default is 0 (off)
+ kvm.nx_huge_pages=
+ [KVM] Controls the software workaround for the
+ X86_BUG_ITLB_MULTIHIT bug.
+ force : Always deploy workaround.
+ off : Never deploy workaround.
+ auto : Deploy workaround based on the presence of
+ X86_BUG_ITLB_MULTIHIT.
+
+ Default is 'auto'.
+
+ If the software workaround is enabled for the host,
+ guests do need not to enable it for nested guests.
+
+ kvm.nx_huge_pages_recovery_ratio=
+ [KVM] Controls how many 4KiB pages are periodically zapped
+ back to huge pages. 0 disables the recovery, otherwise if
+ the value is N KVM will zap 1/Nth of the 4KiB pages every
+ minute. The default is 60.
+
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
Default is 1 (enabled)
@@ -2636,6 +2655,13 @@
ssbd=force-off [ARM64]
l1tf=off [X86]
mds=off [X86]
+ tsx_async_abort=off [X86]
+ kvm.nx_huge_pages=off [X86]
+
+ Exceptions:
+ This does not have any effect on
+ kvm.nx_huge_pages when
+ kvm.nx_huge_pages=force.
auto (default)
Mitigate all CPU vulnerabilities, but leave SMT
@@ -2651,6 +2677,7 @@
be fully mitigated, even if it means losing SMT.
Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86]
+ tsx_async_abort=full,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -4848,6 +4875,71 @@
interruptions from clocksource watchdog are not
acceptable).
+ tsx= [X86] Control Transactional Synchronization
+ Extensions (TSX) feature in Intel processors that
+ support TSX control.
+
+ This parameter controls the TSX feature. The options are:
+
+ on - Enable TSX on the system. Although there are
+ mitigations for all known security vulnerabilities,
+ TSX has been known to be an accelerator for
+ several previous speculation-related CVEs, and
+ so there may be unknown security risks associated
+ with leaving it enabled.
+
+ off - Disable TSX on the system. (Note that this
+ option takes effect only on newer CPUs which are
+ not vulnerable to MDS, i.e., have
+ MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
+ the new IA32_TSX_CTRL MSR through a microcode
+ update. This new MSR allows for the reliable
+ deactivation of the TSX functionality.)
+
+ auto - Disable TSX if X86_BUG_TAA is present,
+ otherwise enable TSX on the system.
+
+ Not specifying this option is equivalent to tsx=off.
+
+ See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+ for more details.
+
+ tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+ Abort (TAA) vulnerability.
+
+ Similar to Micro-architectural Data Sampling (MDS)
+ certain CPUs that support Transactional
+ Synchronization Extensions (TSX) are vulnerable to an
+ exploit against CPU internal buffers which can forward
+ information to a disclosure gadget under certain
+ conditions.
+
+ In vulnerable processors, the speculatively forwarded
+ data can be used in a cache side channel attack, to
+ access data to which the attacker does not have direct
+ access.
+
+ This parameter controls the TAA mitigation. The
+ options are:
+
+ full - Enable TAA mitigation on vulnerable CPUs
+ if TSX is enabled.
+
+ full,nosmt - Enable TAA mitigation and disable SMT on
+ vulnerable CPUs. If TSX is disabled, SMT
+ is not disabled because CPU is not
+ vulnerable to cross-thread TAA attacks.
+ off - Unconditionally disable TAA mitigation
+
+ Not specifying this option is equivalent to
+ tsx_async_abort=full. On CPUs which are MDS affected
+ and deploy MDS mitigation, TAA mitigation is not
+ required and doesn't provide any additional
+ mitigation.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
Format:
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index b301f753ed2c..e77635c5422c 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -43,7 +43,7 @@ SoC Families:
- OMAP2 generic - defaults to OMAP2420
compatible = "ti,omap2"
-- OMAP3 generic - defaults to OMAP3430
+- OMAP3 generic
compatible = "ti,omap3"
- OMAP4 generic - defaults to OMAP4430
compatible = "ti,omap4"
@@ -51,6 +51,8 @@ SoC Families:
compatible = "ti,omap5"
- DRA7 generic - defaults to DRA742
compatible = "ti,dra7"
+- AM33x generic
+ compatible = "ti,am33xx"
- AM43x generic - defaults to AM4372
compatible = "ti,am43"
@@ -63,12 +65,14 @@ SoCs:
- OMAP3430
compatible = "ti,omap3430", "ti,omap3"
+ legacy: "ti,omap34xx" - please do not use any more
- AM3517
compatible = "ti,am3517", "ti,omap3"
- OMAP3630
- compatible = "ti,omap36xx", "ti,omap3"
-- AM33xx
- compatible = "ti,am33xx", "ti,omap3"
+ compatible = "ti,omap3630", "ti,omap3"
+ legacy: "ti,omap36xx" - please do not use any more
+- AM335x
+ compatible = "ti,am33xx"
- OMAP4430
compatible = "ti,omap4430", "ti,omap4"
@@ -110,19 +114,19 @@ SoCs:
- AM4372
compatible = "ti,am4372", "ti,am43"
-Boards:
+Boards (incomplete list of examples):
- OMAP3 BeagleBoard : Low cost community board
- compatible = "ti,omap3-beagle", "ti,omap3"
+ compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3"
- OMAP3 Tobi with Overo : Commercial expansion board with daughter board
- compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3"
+ compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3"
- OMAP4 SDP : Software Development Board
- compatible = "ti,omap4-sdp", "ti,omap4430"
+ compatible = "ti,omap4-sdp", "ti,omap4430", "ti,omap4"
- OMAP4 PandaBoard : Low cost community board
- compatible = "ti,omap4-panda", "ti,omap4430"
+ compatible = "ti,omap4-panda", "ti,omap4430", "ti,omap4"
- OMAP4 DuoVero with Parlor : Commercial expansion board with daughter board
compatible = "gumstix,omap4-duovero-parlor", "gumstix,omap4-duovero", "ti,omap4430", "ti,omap4";
@@ -134,16 +138,16 @@ Boards:
compatible = "variscite,var-dvk-om44", "variscite,var-som-om44", "ti,omap4460", "ti,omap4";
- OMAP3 EVM : Software Development Board for OMAP35x, AM/DM37x
- compatible = "ti,omap3-evm", "ti,omap3"
+ compatible = "ti,omap3-evm", "ti,omap3630", "ti,omap3"
- AM335X EVM : Software Development Board for AM335x
- compatible = "ti,am335x-evm", "ti,am33xx", "ti,omap3"
+ compatible = "ti,am335x-evm", "ti,am33xx"
- AM335X Bone : Low cost community board
- compatible = "ti,am335x-bone", "ti,am33xx", "ti,omap3"
+ compatible = "ti,am335x-bone", "ti,am33xx"
- AM3359 ICEv2 : Low cost Industrial Communication Engine EVM.
- compatible = "ti,am3359-icev2", "ti,am33xx", "ti,omap3"
+ compatible = "ti,am3359-icev2", "ti,am33xx"
- AM335X OrionLXm : Substation Automation Platform
compatible = "novatech,am335x-lxm", "ti,am33xx"
diff --git a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
index 0c38e4b8fc51..1758051798fe 100644
--- a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
+++ b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
@@ -15,12 +15,16 @@ In 'cpus' nodes:
In 'operating-points-v2' table:
- compatible: Should be
- - 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx SoCs
+ - 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx,
+ omap34xx, omap36xx and am3517 SoCs
- syscon: A phandle pointing to a syscon node representing the control module
register space of the SoC.
Optional properties:
--------------------
+- "vdd-supply", "vbb-supply": to define two regulators for dra7xx
+- "cpu0-supply", "vbb-supply": to define two regulators for omap36xx
+
For each opp entry in 'operating-points-v2' table:
- opp-supported-hw: Two bitfields indicating:
1. Which revision of the SoC the OPP is supported by
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
index 3e36c1d11386..fb46b491791c 100644
--- a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
@@ -10,14 +10,23 @@ The Exynos PPMU driver uses the devfreq-event class to provide event data
to various devfreq devices. The devfreq devices would use the event data when
derterming the current state of each IP.
-Required properties:
+Required properties for PPMU device:
- compatible: Should be "samsung,exynos-ppmu" or "samsung,exynos-ppmu-v2.
- reg: physical base address of each PPMU and length of memory mapped region.
-Optional properties:
+Optional properties for PPMU device:
- clock-names : the name of clock used by the PPMU, "ppmu"
- clocks : phandles for clock specified in "clock-names" property
+Required properties for 'events' child node of PPMU device:
+- event-name : the unique event name among PPMU device
+Optional properties for 'events' child node of PPMU device:
+- event-data-type : Define the type of data which shell be counted
+by the counter. You can check include/dt-bindings/pmu/exynos_ppmu.h for
+all possible type, i.e. count read requests, count write data in bytes,
+etc. This field is optional and when it is missing, the driver code
+will use default data type.
+
Example1 : PPMUv1 nodes in exynos3250.dtsi are listed below.
ppmu_dmc0: ppmu_dmc0@106a0000 {
@@ -145,3 +154,16 @@ Example3 : PPMUv2 nodes in exynos5433.dtsi are listed below.
reg = <0x104d0000 0x2000>;
status = "disabled";
};
+
+Example4 : 'event-data-type' in exynos4412-ppmu-common.dtsi are listed below.
+
+ &ppmu_dmc0 {
+ status = "okay";
+ events {
+ ppmu_dmc0_3: ppmu-event3-dmc0 {
+ event-name = "ppmu-event3-dmc0";
+ event-data-type = <(PPMU_RO_DATA_CNT |
+ PPMU_WO_DATA_CNT)>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
index f8e946471a58..e71f752cc18f 100644
--- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -50,8 +50,6 @@ Required properties only for passive bus device:
Optional properties only for parent bus device:
- exynos,saturation-ratio: the percentage value which is used to calibrate
the performance count against total cycle count.
-- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
- which is used to calculate the max voltage.
Detailed correlation between sub-blocks and power line according to Exynos SoC:
- In case of Exynos3250, there are two power line as following:
diff --git a/Documentation/power/drivers-testing.rst b/Documentation/power/drivers-testing.rst
index e53f1999fc39..d77d2894f9fe 100644
--- a/Documentation/power/drivers-testing.rst
+++ b/Documentation/power/drivers-testing.rst
@@ -39,9 +39,10 @@ c) Compile the driver directly into the kernel and try the test modes of
d) Attempt to hibernate with the driver compiled directly into the kernel
in the "reboot", "shutdown" and "platform" modes.
-e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.rst,
- 2). [As far as the STR tests are concerned, it should not matter whether or
- not the driver is built as a module.]
+e) Try the test modes of suspend (see:
+ Documentation/power/basic-pm-debugging.rst, 2). [As far as the STR tests are
+ concerned, it should not matter whether or not the driver is built as a
+ module.]
f) Attempt to suspend to RAM using the s2ram tool with the driver loaded
(see: Documentation/power/basic-pm-debugging.rst, 2).
diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst
index ef110fe55e82..8bd693399834 100644
--- a/Documentation/power/freezing-of-tasks.rst
+++ b/Documentation/power/freezing-of-tasks.rst
@@ -215,30 +215,31 @@ VI. Are there any precautions to be taken to prevent freezing failures?
Yes, there are.
-First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
-from system-wide sleep such as suspend/hibernation is not encouraged.
-If possible, that piece of code must instead hook onto the suspend/hibernation
-notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
-(kernel/cpu.c) for an example.
-
-However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
-it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
-that could lead to freezing failures, because if the suspend/hibernate code
-successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
-to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
-state. As a consequence, the freezer would not be able to freeze that task,
-leading to freezing failure.
+First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a
+piece of code from system-wide sleep such as suspend/hibernation is not
+encouraged. If possible, that piece of code must instead hook onto the
+suspend/hibernation notifiers to achieve mutual exclusion. Look at the
+CPU-Hotplug code (kernel/cpu.c) for an example.
+
+However, if that is not feasible, and grabbing 'system_transition_mutex' is
+deemed necessary, it is strongly discouraged to directly call
+mutex_[un]lock(&system_transition_mutex) since that could lead to freezing
+failures, because if the suspend/hibernate code successfully acquired the
+'system_transition_mutex' lock, and hence that other entity failed to acquire
+the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE state. As a
+consequence, the freezer would not be able to freeze that task, leading to
+freezing failure.
However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
since they ask the freezer to skip freezing this task, since it is anyway
-"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
-only after the entire suspend/hibernation sequence is complete.
-So, to summarize, use [un]lock_system_sleep() instead of directly using
+"frozen enough" as it is blocked on 'system_transition_mutex', which will be
+released only after the entire suspend/hibernation sequence is complete. So, to
+summarize, use [un]lock_system_sleep() instead of directly using
mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
V. Miscellaneous
================
/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
-all user space processes or all freezable kernel threads, in unit of millisecond.
-The default value is 20000, with range of unsigned integer.
+all user space processes or all freezable kernel threads, in unit of
+millisecond. The default value is 20000, with range of unsigned integer.
diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst
index 209c7613f5a4..e3cc4f349ea8 100644
--- a/Documentation/power/opp.rst
+++ b/Documentation/power/opp.rst
@@ -73,19 +73,21 @@ factors. Example usage: Thermal management or other exceptional situations where
SoC framework might choose to disable a higher frequency OPP to safely continue
operations until that OPP could be re-enabled if possible.
-OPP library facilitates this concept in it's implementation. The following
+OPP library facilitates this concept in its implementation. The following
operational functions operate only on available opps:
-opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
+opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq,
+dev_pm_opp_get_opp_count
-dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
-be used for dev_pm_opp_enable/disable functions to make an opp available as required.
+dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer
+which can then be used for dev_pm_opp_enable/disable functions to make an
+opp available as required.
WARNING: Users of OPP library should refresh their availability count using
-get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the
-exact mechanism to trigger these or the notification mechanism to other
-dependent subsystems such as cpufreq are left to the discretion of the SoC
-specific framework which uses the OPP library. Similar care needs to be taken
-care to refresh the cpufreq table in cases of these operations.
+get_opp_count if dev_pm_opp_enable/disable functions are invoked for a
+device, the exact mechanism to trigger these or the notification mechanism
+to other dependent subsystems such as cpufreq are left to the discretion of
+the SoC specific framework which uses the OPP library. Similar care needs
+to be taken care to refresh the cpufreq table in cases of these operations.
2. Initial OPP List Registration
================================
@@ -99,11 +101,11 @@ OPPs dynamically using the dev_pm_opp_enable / disable functions.
dev_pm_opp_add
Add a new OPP for a specific domain represented by the device pointer.
The OPP is defined using the frequency and voltage. Once added, the OPP
- is assumed to be available and control of it's availability can be done
- with the dev_pm_opp_enable/disable functions. OPP library internally stores
- and manages this information in the opp struct. This function may be
- used by SoC framework to define a optimal list as per the demands of
- SoC usage environment.
+ is assumed to be available and control of its availability can be done
+ with the dev_pm_opp_enable/disable functions. OPP library
+ internally stores and manages this information in the opp struct.
+ This function may be used by SoC framework to define a optimal list
+ as per the demands of SoC usage environment.
WARNING:
Do not use this function in interrupt context.
@@ -354,7 +356,7 @@ struct dev_pm_opp
struct device
This is used to identify a domain to the OPP layer. The
- nature of the device and it's implementation is left to the user of
+ nature of the device and its implementation is left to the user of
OPP library such as the SoC framework.
Overall, in a simplistic view, the data structure operations is represented as
diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst
index 0e2ef7429304..51e0a493d284 100644
--- a/Documentation/power/pci.rst
+++ b/Documentation/power/pci.rst
@@ -426,12 +426,12 @@ pm->runtime_idle() callback.
2.4. System-Wide Power Transitions
----------------------------------
There are a few different types of system-wide power transitions, described in
-Documentation/driver-api/pm/devices.rst. Each of them requires devices to be handled
-in a specific way and the PM core executes subsystem-level power management
-callbacks for this purpose. They are executed in phases such that each phase
-involves executing the same subsystem-level callback for every device belonging
-to the given subsystem before the next phase begins. These phases always run
-after tasks have been frozen.
+Documentation/driver-api/pm/devices.rst. Each of them requires devices to be
+handled in a specific way and the PM core executes subsystem-level power
+management callbacks for this purpose. They are executed in phases such that
+each phase involves executing the same subsystem-level callback for every device
+belonging to the given subsystem before the next phase begins. These phases
+always run after tasks have been frozen.
2.4.1. System Suspend
^^^^^^^^^^^^^^^^^^^^^
@@ -636,12 +636,12 @@ System restore requires a hibernation image to be loaded into memory and the
pre-hibernation memory contents to be restored before the pre-hibernation system
activity can be resumed.
-As described in Documentation/driver-api/pm/devices.rst, the hibernation image is loaded
-into memory by a fresh instance of the kernel, called the boot kernel, which in
-turn is loaded and run by a boot loader in the usual way. After the boot kernel
-has loaded the image, it needs to replace its own code and data with the code
-and data of the "hibernated" kernel stored within the image, called the image
-kernel. For this purpose all devices are frozen just like before creating
+As described in Documentation/driver-api/pm/devices.rst, the hibernation image
+is loaded into memory by a fresh instance of the kernel, called the boot kernel,
+which in turn is loaded and run by a boot loader in the usual way. After the
+boot kernel has loaded the image, it needs to replace its own code and data with
+the code and data of the "hibernated" kernel stored within the image, called the
+image kernel. For this purpose all devices are frozen just like before creating
the image during hibernation, in the
prepare, freeze, freeze_noirq
@@ -691,8 +691,8 @@ controlling the runtime power management of their devices.
At the time of this writing there are two ways to define power management
callbacks for a PCI device driver, the recommended one, based on using a
-dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and the
-"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
+dev_pm_ops structure described in Documentation/driver-api/pm/devices.rst, and
+the "legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
.resume() callbacks from struct pci_driver are used. The legacy approach,
however, doesn't allow one to define runtime power management callbacks and is
not really suitable for any new drivers. Therefore it is not covered by this
diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst
index 3097694fba69..0d62d506caf0 100644
--- a/Documentation/power/pm_qos_interface.rst
+++ b/Documentation/power/pm_qos_interface.rst
@@ -8,8 +8,8 @@ one of the parameters.
Two different PM QoS frameworks are available:
1. PM QoS classes for cpu_dma_latency
-2. the per-device PM QoS framework provides the API to manage the per-device latency
-constraints and PM QoS flags.
+2. The per-device PM QoS framework provides the API to manage the
+ per-device latency constraints and PM QoS flags.
Each parameters have defined units:
@@ -47,14 +47,14 @@ void pm_qos_add_request(handle, param_class, target_value):
pm_qos API functions.
void pm_qos_update_request(handle, new_target_value):
- Will update the list element pointed to by the handle with the new target value
- and recompute the new aggregated target, calling the notification tree if the
- target is changed.
+ Will update the list element pointed to by the handle with the new target
+ value and recompute the new aggregated target, calling the notification tree
+ if the target is changed.
void pm_qos_remove_request(handle):
- Will remove the element. After removal it will update the aggregate target and
- call the notification tree if the target was changed as a result of removing
- the request.
+ Will remove the element. After removal it will update the aggregate target
+ and call the notification tree if the target was changed as a result of
+ removing the request.
int pm_qos_request(param_class):
Returns the aggregated value for a given PM QoS class.
@@ -167,9 +167,9 @@ int dev_pm_qos_expose_flags(device, value)
change the value of the PM_QOS_FLAG_NO_POWER_OFF flag.
void dev_pm_qos_hide_flags(device)
- Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list
- of flags and remove sysfs attribute pm_qos_no_power_off from the device's power
- directory.
+ Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS
+ list of flags and remove sysfs attribute pm_qos_no_power_off from the device's
+ power directory.
Notification mechanisms:
@@ -179,8 +179,8 @@ int dev_pm_qos_add_notifier(device, notifier, type):
Adds a notification callback function for the device for a particular request
type.
- The callback is called when the aggregated value of the device constraints list
- is changed.
+ The callback is called when the aggregated value of the device constraints
+ list is changed.
int dev_pm_qos_remove_notifier(device, notifier, type):
Removes the notification callback function for the device.
diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index 2c2ec99b5088..ab8406c84254 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -268,8 +268,8 @@ defined in include/linux/pm.h:
`unsigned int runtime_auto;`
- if set, indicates that the user space has allowed the device driver to
power manage the device at run time via the /sys/devices/.../power/control
- `interface;` it may only be modified with the help of the pm_runtime_allow()
- and pm_runtime_forbid() helper functions
+ `interface;` it may only be modified with the help of the
+ pm_runtime_allow() and pm_runtime_forbid() helper functions
`unsigned int no_callbacks;`
- indicates that the device does not use the runtime PM callbacks (see
diff --git a/Documentation/power/suspend-and-cpuhotplug.rst b/Documentation/power/suspend-and-cpuhotplug.rst
index 7ac8e1f549f4..572d968c5375 100644
--- a/Documentation/power/suspend-and-cpuhotplug.rst
+++ b/Documentation/power/suspend-and-cpuhotplug.rst
@@ -106,8 +106,8 @@ execution during resume):
* Release system_transition_mutex lock.
-It is to be noted here that the system_transition_mutex lock is acquired at the very
-beginning, when we are just starting out to suspend, and then released only
+It is to be noted here that the system_transition_mutex lock is acquired at the
+very beginning, when we are just starting out to suspend, and then released only
after the entire cycle is complete (i.e., suspend + resume).
::
@@ -165,7 +165,8 @@ Important files and functions/entry points:
- kernel/power/process.c : freeze_processes(), thaw_processes()
- kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish()
-- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus()
+- kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](),
+ [disable|enable]_nonboot_cpus()
diff --git a/Documentation/power/swsusp.rst b/Documentation/power/swsusp.rst
index d000312f6965..8524f079e05c 100644
--- a/Documentation/power/swsusp.rst
+++ b/Documentation/power/swsusp.rst
@@ -118,7 +118,8 @@ In a really perfect world::
echo 1 > /proc/acpi/sleep # for standby
echo 2 > /proc/acpi/sleep # for suspend to ram
- echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power conservative
+ echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power
+ # conservative
echo 4 > /proc/acpi/sleep # for suspend to disk
echo 5 > /proc/acpi/sleep # for shutdown unfriendly the system
@@ -192,8 +193,8 @@ Q:
A:
The freezing of tasks is a mechanism by which user space processes and some
- kernel threads are controlled during hibernation or system-wide suspend (on some
- architectures). See freezing-of-tasks.txt for details.
+ kernel threads are controlled during hibernation or system-wide suspend (on
+ some architectures). See freezing-of-tasks.txt for details.
Q:
What is the difference between "platform" and "shutdown"?
@@ -282,7 +283,8 @@ A:
suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
with state snapshot
- state snapshot: copy of whole used memory is taken with interrupts disabled
+ state snapshot: copy of whole used memory is taken with interrupts
+ disabled
resume(): devices are woken up so that we can write image to swap
@@ -353,8 +355,8 @@ Q:
A:
Generally, yes, you can. However, it requires you to use the "resume=" and
- "resume_offset=" kernel command line parameters, so the resume from a swap file
- cannot be initiated from an initrd or initramfs image. See
+ "resume_offset=" kernel command line parameters, so the resume from a swap
+ file cannot be initiated from an initrd or initramfs image. See
swsusp-and-swap-files.txt for details.
Q:
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index af64c4bb4447..a8de2fbc1caa 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -27,6 +27,7 @@ x86-specific Documentation
mds
microcode
resctrl_ui
+ tsx_async_abort
usb-legacy-support
i386/index
x86_64/index
diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst
new file mode 100644
index 000000000000..583ddc185ba2
--- /dev/null
+++ b/Documentation/x86/tsx_async_abort.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TSX Async Abort (TAA) mitigation
+================================
+
+.. _tsx_async_abort:
+
+Overview
+--------
+
+TSX Async Abort (TAA) is a side channel attack on internal buffers in some
+Intel processors similar to Microachitectural Data Sampling (MDS). In this
+case certain loads may speculatively pass invalid data to dependent operations
+when an asynchronous abort condition is pending in a Transactional
+Synchronization Extensions (TSX) transaction. This includes loads with no
+fault or assist condition. Such loads may speculatively expose stale data from
+the same uarch data structures as in MDS, with same scope of exposure i.e.
+same-thread and cross-thread. This issue affects all current processors that
+support TSX.
+
+Mitigation strategy
+-------------------
+
+a) TSX disable - one of the mitigations is to disable TSX. A new MSR
+IA32_TSX_CTRL will be available in future and current processors after
+microcode update which can be used to disable TSX. In addition, it
+controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
+
+b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
+vulnerability. More details on this approach can be found in
+:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ ============= ============================================================
+ off Mitigation is disabled. Either the CPU is not affected or
+ tsx_async_abort=off is supplied on the kernel command line.
+
+ tsx disabled Mitigation is enabled. TSX feature is disabled by default at
+ bootup on processors that support TSX control.
+
+ verw Mitigation is enabled. CPU is affected and MD_CLEAR is
+ advertised in CPUID.
+
+ ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not
+ advertised in CPUID. That is mainly for virtualization
+ scenarios where the host has the updated microcode but the
+ hypervisor does not expose MD_CLEAR in CPUID. It's a best
+ effort approach without guarantee.
+ ============= ============================================================
+
+If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
+not provided then the kernel selects an appropriate mitigation depending on the
+status of RTM and MD_CLEAR CPUID bits.
+
+Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
+TAA mitigation, VERW behavior and TSX feature for various combinations of
+MSR_IA32_ARCH_CAPABILITIES bits.
+
+1. "tsx=off"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Disabled Yes TSX disabled TSX disabled
+ 1 X 1 Disabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+2. "tsx=on"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Enabled Yes None Same as MDS
+ 1 X 1 Enabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+3. "tsx=auto"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Disabled Yes TSX disabled TSX disabled
+ 1 X 1 Enabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
+indicates whether MSR_IA32_TSX_CTRL is supported.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+ Bit 0: When set it disables the Restricted Transactional Memory (RTM)
+ sub-feature of TSX (will force all transactions to abort on the
+ XBEGIN instruction).
+
+ Bit 1: When set it disables the enumeration of the RTM and HLE feature
+ (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+ CPUID(EAX=7).EBX{bit11} read as 0).
diff --git a/MAINTAINERS b/MAINTAINERS
index eb19fad370d7..e54f7d4c9457 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3261,7 +3261,6 @@ S: Maintained
F: drivers/cpufreq/bmips-cpufreq.c
BROADCOM BMIPS MIPS ARCHITECTURE
-M: Kevin Cernekee <cernekee@gmail.com>
M: Florian Fainelli <f.fainelli@gmail.com>
L: bcm-kernel-feedback-list@broadcom.com
L: linux-mips@vger.kernel.org
@@ -3533,7 +3532,7 @@ BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
M: Chanwoo Choi <cw00.choi@samsung.com>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
S: Maintained
F: drivers/devfreq/exynos-bus.c
F: Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -4270,14 +4269,13 @@ F: include/linux/cpufreq.h
F: include/linux/sched/cpufreq.h
F: tools/testing/selftests/cpufreq/
-CPU FREQUENCY DRIVERS - ARM BIG LITTLE
+CPU FREQUENCY DRIVERS - VEXPRESS SPC ARM BIG LITTLE
M: Viresh Kumar <viresh.kumar@linaro.org>
M: Sudeep Holla <sudeep.holla@arm.com>
L: linux-pm@vger.kernel.org
W: http://www.arm.com/products/processors/technologies/biglittleprocessing.php
S: Maintained
-F: drivers/cpufreq/arm_big_little.h
-F: drivers/cpufreq/arm_big_little.c
+F: drivers/cpufreq/vexpress-spc-cpufreq.c
CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com>
@@ -4762,9 +4760,9 @@ F: include/linux/devcoredump.h
DEVICE FREQUENCY (DEVFREQ)
M: MyungJoo Ham <myungjoo.ham@samsung.com>
M: Kyungmin Park <kyungmin.park@samsung.com>
-R: Chanwoo Choi <cw00.choi@samsung.com>
+M: Chanwoo Choi <cw00.choi@samsung.com>
L: linux-pm@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
S: Maintained
F: drivers/devfreq/
F: include/linux/devfreq.h
@@ -4774,10 +4772,11 @@ F: include/trace/events/devfreq.h
DEVICE FREQUENCY EVENT (DEVFREQ-EVENT)
M: Chanwoo Choi <cw00.choi@samsung.com>
L: linux-pm@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
S: Supported
F: drivers/devfreq/event/
F: drivers/devfreq/devfreq-event.c
+F: include/dt-bindings/pmu/exynos_ppmu.h
F: include/linux/devfreq-event.h
F: Documentation/devicetree/bindings/devfreq/event/
@@ -8299,11 +8298,14 @@ F: drivers/hid/intel-ish-hid/
INTEL IOMMU (VT-d)
M: David Woodhouse <dwmw2@infradead.org>
+M: Lu Baolu <baolu.lu@linux.intel.com>
L: iommu@lists.linux-foundation.org
-T: git git://git.infradead.org/iommu-2.6.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
S: Supported
-F: drivers/iommu/intel-iommu.c
+F: drivers/iommu/dmar.c
+F: drivers/iommu/intel*.[ch]
F: include/linux/intel-iommu.h
+F: include/linux/intel-svm.h
INTEL IOP-ADMA DMA DRIVER
R: Dan Williams <dan.j.williams@intel.com>
@@ -12998,6 +13000,15 @@ L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/pm8001/
+PM-GRAPH UTILITY
+M: "Todd E Brandt" <todd.e.brandt@linux.intel.com>
+L: linux-pm@vger.kernel.org
+W: https://01.org/pm-graph
+B: https://bugzilla.kernel.org/buglist.cgi?component=pm-graph&product=Tools
+T: git git://github.com/intel/pm-graph
+S: Supported
+F: tools/power/pm-graph
+
PNP SUPPORT
M: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
S: Maintained
@@ -17338,12 +17349,6 @@ F: include/linux/vbox_utils.h
F: include/uapi/linux/vbox*.h
F: drivers/virt/vboxguest/
-VIRTUAL BOX SHARED FOLDER VFS DRIVER:
-M: Hans de Goede <hdegoede@redhat.com>
-L: linux-fsdevel@vger.kernel.org
-S: Maintained
-F: drivers/staging/vboxsf/*
-
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
S: Maintained
diff --git a/Makefile b/Makefile
index 1d5298356ea8..9cd289196267 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
@@ -917,6 +917,9 @@ ifeq ($(CONFIG_RELR),y)
LDFLAGS_vmlinux += --pack-dyn-relocs=relr
endif
+# make the checker run with the right architecture
+CHECKFLAGS += --arch=$(ARCH)
+
# insure the checker run with the right endianness
CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi
index bf3002009b00..76f819f4ba48 100644
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@@ -16,6 +16,37 @@
can = &hecc;
};
+ cpus {
+ cpu: cpu@0 {
+ /* Based on OMAP3630 variants OPP50 and OPP100 */
+ operating-points-v2 = <&cpu0_opp_table>;
+
+ clock-latency = <300000>; /* From legacy driver */
+ };
+ };
+
+ cpu0_opp_table: opp-table {
+ compatible = "operating-points-v2-ti-cpu";
+ syscon = <&scm_conf>;
+ /*
+ * AM3517 TRM only lists 600MHz @ 1.2V, but omap36xx
+ * appear to operate at 300MHz as well. Since AM3517 only
+ * lists one operating voltage, it will remain fixed at 1.2V
+ */
+ opp50-300000000 {
+ opp-hz = /bits/ 64 <300000000>;
+ opp-microvolt = <1200000>;
+ opp-supported-hw = <0xffffffff 0xffffffff>;
+ opp-suspend;
+ };
+
+ opp100-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <1200000>;
+ opp-supported-hw = <0xffffffff 0xffffffff>;
+ };
+ };
+
ocp@68000000 {
am35x_otg_hs: am35x_otg_hs@5c040000 {
compatible = "ti,omap3-musb";
diff --git a/arch/arm/boot/dts/am3517_mt_ventoux.dts b/arch/arm/boot/dts/am3517_mt_ventoux.dts
index e507e4ae0d88..e7d7124a34ba 100644
--- a/arch/arm/boot/dts/am3517_mt_ventoux.dts
+++ b/arch/arm/boot/dts/am3517_mt_ventoux.dts
@@ -8,7 +8,7 @@
/ {
model = "TeeJet Mt.Ventoux";
- compatible = "teejet,mt_ventoux", "ti,omap3";
+ compatible = "teejet,mt_ventoux", "ti,am3517", "ti,omap3";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
index f7a841a28865..2a0a98fe67f0 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
@@ -9,5 +9,5 @@
/ {
model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit";
- compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3";
+ compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
index 7675bc3fa868..57bae2aa910e 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
@@ -9,5 +9,5 @@
/ {
model = "LogicPD Zoom OMAP35xx Torpedo Development Kit";
- compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3";
+ compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3430", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 1aa99fc1487a..125ed933ca75 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 BeagleBoard xM";
- compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3";
+ compatible = "ti,omap3-beagle-xm", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index e3df3c166902..4ed3f93f5841 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 BeagleBoard";
- compatible = "ti,omap3-beagle", "ti,omap3";
+ compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-cm-t3530.dts b/arch/arm/boot/dts/omap3-cm-t3530.dts
index 76e52c78cbb4..32dbaeaed147 100644
--- a/arch/arm/boot/dts/omap3-cm-t3530.dts
+++ b/arch/arm/boot/dts/omap3-cm-t3530.dts
@@ -9,7 +9,7 @@
/ {
model = "CompuLab CM-T3530";
- compatible = "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3";
+ compatible = "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
/* Regulator to trigger the reset signal of the Wifi module */
mmc2_sdio_reset: regulator-mmc2-sdio-reset {
diff --git a/arch/arm/boot/dts/omap3-cm-t3730.dts b/arch/arm/boot/dts/omap3-cm-t3730.dts
index 6e944dfa0f3d..683819bf0915 100644
--- a/arch/arm/boot/dts/omap3-cm-t3730.dts
+++ b/arch/arm/boot/dts/omap3-cm-t3730.dts
@@ -9,7 +9,7 @@
/ {
model = "CompuLab CM-T3730";
- compatible = "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3";
+ compatible = "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3";
wl12xx_vmmc2: wl12xx_vmmc2 {
compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
index a80fc60bc773..afed85078ad8 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
@@ -11,7 +11,7 @@
#include "omap3-devkit8000-lcd-common.dtsi"
/ {
model = "TimLL OMAP3 Devkit8000 with 4.3'' LCD panel";
- compatible = "timll,omap3-devkit8000", "ti,omap3";
+ compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
lcd0: display {
panel-timing {
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
index 0753776071f8..07c51a105c0d 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
@@ -11,7 +11,7 @@
#include "omap3-devkit8000-lcd-common.dtsi"
/ {
model = "TimLL OMAP3 Devkit8000 with 7.0'' LCD panel";
- compatible = "timll,omap3-devkit8000", "ti,omap3";
+ compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
lcd0: display {
panel-timing {
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index faafc48d8f61..162d0726b008 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -7,7 +7,7 @@
#include "omap3-devkit8000-common.dtsi"
/ {
model = "TimLL OMAP3 Devkit8000";
- compatible = "timll,omap3-devkit8000", "ti,omap3";
+ compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
aliases {
display1 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index b6ef1a7ac8a4..409a758c99f1 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -11,7 +11,7 @@
/ {
model = "OMAP3 GTA04";
- compatible = "ti,omap3-gta04", "ti,omap36xx", "ti,omap3";
+ compatible = "ti,omap3-gta04", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-ha-lcd.dts b/arch/arm/boot/dts/omap3-ha-lcd.dts
index badb9b3c8897..c9ecbc45c8e2 100644
--- a/arch/arm/boot/dts/omap3-ha-lcd.dts
+++ b/arch/arm/boot/dts/omap3-ha-lcd.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 HEAD acoustics LCD-baseboard with TAO3530 SOM";
- compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+ compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
};
&omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-ha.dts b/arch/arm/boot/dts/omap3-ha.dts
index a5365252bfbe..35c4e15abeb7 100644
--- a/arch/arm/boot/dts/omap3-ha.dts
+++ b/arch/arm/boot/dts/omap3-ha.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 HEAD acoustics baseboard with TAO3530 SOM";
- compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+ compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
};
&omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
index 03dcd05fb8a0..d134ce1cffc0 100644
--- a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
+++ b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEPv2 Rev. F (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0020-rev-f", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0020-rev-f", "ti,omap3630", "ti,omap36xx", "ti,omap3";
/* Regulator to trigger the WL_EN signal of the Wifi module */
lbep5clwmc_wlen: regulator-lbep5clwmc-wlen {
diff --git a/arch/arm/boot/dts/omap3-igep0020.dts b/arch/arm/boot/dts/omap3-igep0020.dts
index 6d0519e3dfd0..e341535a7162 100644
--- a/arch/arm/boot/dts/omap3-igep0020.dts
+++ b/arch/arm/boot/dts/omap3-igep0020.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEPv2 Rev. C (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0020", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0020", "ti,omap3630", "ti,omap36xx", "ti,omap3";
vmmcsdio_fixed: fixedregulator-mmcsdio {
compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
index 060acd1e803a..9ca1d0f61964 100644
--- a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
+++ b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEP COM MODULE Rev. G (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0030-rev-g", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0030-rev-g", "ti,omap3630", "ti,omap36xx", "ti,omap3";
/* Regulator to trigger the WL_EN signal of the Wifi module */
lbep5clwmc_wlen: regulator-lbep5clwmc-wlen {
diff --git a/arch/arm/boot/dts/omap3-igep0030.dts b/arch/arm/boot/dts/omap3-igep0030.dts
index 25170bd3c573..32f31035daa2 100644
--- a/arch/arm/boot/dts/omap3-igep0030.dts
+++ b/arch/arm/boot/dts/omap3-igep0030.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEP COM MODULE Rev. E (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0030", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0030", "ti,omap3630", "ti,omap36xx", "ti,omap3";
vmmcsdio_fixed: fixedregulator-mmcsdio {
compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-ldp.dts b/arch/arm/boot/dts/omap3-ldp.dts
index 9a5fde2d9bce..ec9ba04ef43b 100644
--- a/arch/arm/boot/dts/omap3-ldp.dts
+++ b/arch/arm/boot/dts/omap3-ldp.dts
@@ -10,7 +10,7 @@
/ {
model = "TI OMAP3430 LDP (Zoom1 Labrador)";
- compatible = "ti,omap3-ldp", "ti,omap3";
+ compatible = "ti,omap3-ldp", "ti,omap3430", "ti,omap3";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index c22833d4e568..73d477898ec2 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -7,7 +7,7 @@
/ {
model = "INCOstartec LILLY-A83X module (DM3730)";
- compatible = "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3";
+ compatible = "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3";
chosen {
bootargs = "console=ttyO0,115200n8 vt.global_cursor_default=0 consoleblank=0";
diff --git a/arch/arm/boot/dts/omap3-lilly-dbb056.dts b/arch/arm/boot/dts/omap3-lilly-dbb056.dts
index fec335400074..ecb4ef738e07 100644
--- a/arch/arm/boot/dts/omap3-lilly-dbb056.dts
+++ b/arch/arm/boot/dts/omap3-lilly-dbb056.dts
@@ -8,7 +8,7 @@
/ {
model = "INCOstartec LILLY-DBB056 (DM3730)";
- compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3";
+ compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&twl {
diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts
index 74c0ff2350d3..2495a696cec6 100644
--- a/arch/arm/boot/dts/omap3-n9.dts
+++ b/arch/arm/boot/dts/omap3-n9.dts
@@ -12,7 +12,7 @@
/ {
model = "Nokia N9";
- compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3";
+ compatible = "nokia,omap3-n9", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&i2c2 {
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index 6681d4519e97..a075b63f3087 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -11,13 +11,6 @@
cpus {
cpu@0 {
cpu0-supply = <&vcc>;
- operating-points = <
- /* kHz uV */
- 300000 1012500
- 600000 1200000
- 800000 1325000
- 1000000 1375000
- >;
};
};
diff --git a/arch/arm/boot/dts/omap3-n950.dts b/arch/arm/boot/dts/omap3-n950.dts
index 9886bf8b90ab..31d47a1fad84 100644
--- a/arch/arm/boot/dts/omap3-n950.dts
+++ b/arch/arm/boot/dts/omap3-n950.dts
@@ -12,7 +12,7 @@
/ {
model = "Nokia N950";
- compatible = "nokia,omap3-n950", "ti,omap36xx", "ti,omap3";
+ compatible = "nokia,omap3-n950", "ti,omap3630", "ti,omap36xx", "ti,omap3";
keys {
compatible = "gpio-keys";
diff --git a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
index 18338576c41d..7f04dfad8203 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
@@ -14,5 +14,5 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Alto35";
- compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
index f204c8af8281..bc5a04e03336 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Chestnut43";
- compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
index c633f7cee68e..065c31cbf0e2 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Gallop43";
- compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
index fb88ebc9858c..e38c1c51392c 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo35";
- compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
index 76cca00d97b6..e6dc23159c4d 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo43";
- compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-summit.dts b/arch/arm/boot/dts/omap3-overo-storm-summit.dts
index cc081a9e4c1e..587c08ce282d 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-summit.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-summit.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Summit";
- compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
index 1de41c0826e0..f57de6010994 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
@@ -14,6 +14,6 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Tobi";
- compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
index 9ed13118ed8e..281af6c113be 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
@@ -14,5 +14,5 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on TobiDuo";
- compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-pandora-1ghz.dts b/arch/arm/boot/dts/omap3-pandora-1ghz.dts
index 81b957f33c9f..ea509956d7ac 100644
--- a/arch/arm/boot/dts/omap3-pandora-1ghz.dts
+++ b/arch/arm/boot/dts/omap3-pandora-1ghz.dts
@@ -16,7 +16,7 @@
/ {
model = "Pandora Handheld Console 1GHz";
- compatible = "openpandora,omap3-pandora-1ghz", "ti,omap36xx", "ti,omap3";
+ compatible = "openpandora,omap3-pandora-1ghz", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-sbc-t3530.dts b/arch/arm/boot/dts/omap3-sbc-t3530.dts
index ae96002abb3b..24bf3fd86641 100644
--- a/arch/arm/boot/dts/omap3-sbc-t3530.dts
+++ b/arch/arm/boot/dts/omap3-sbc-t3530.dts
@@ -8,7 +8,7 @@
/ {
model = "CompuLab SBC-T3530 with CM-T3530";
- compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3";
+ compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
aliases {
display0 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-sbc-t3730.dts b/arch/arm/boot/dts/omap3-sbc-t3730.dts
index 7de6df16fc17..eb3893b9535e 100644
--- a/arch/arm/boot/dts/omap3-sbc-t3730.dts
+++ b/arch/arm/boot/dts/omap3-sbc-t3730.dts
@@ -8,7 +8,7 @@
/ {
model = "CompuLab SBC-T3730 with CM-T3730";
- compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3";
+ compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3";
aliases {
display0 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-sniper.dts b/arch/arm/boot/dts/omap3-sniper.dts
index 40a87330e8c3..b6879cdc5c13 100644
--- a/arch/arm/boot/dts/omap3-sniper.dts
+++ b/arch/arm/boot/dts/omap3-sniper.dts
@@ -9,7 +9,7 @@
/ {
model = "LG Optimus Black";
- compatible = "lg,omap3-sniper", "ti,omap36xx", "ti,omap3";
+ compatible = "lg,omap3-sniper", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-thunder.dts b/arch/arm/boot/dts/omap3-thunder.dts
index 6276e7079b36..64221e3b3477 100644
--- a/arch/arm/boot/dts/omap3-thunder.dts
+++ b/arch/arm/boot/dts/omap3-thunder.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 Thunder baseboard with TAO3530 SOM";
- compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+ compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
};
&omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts
index db3a2fe84e99..d240e39f2151 100644
--- a/arch/arm/boot/dts/omap3-zoom3.dts
+++ b/arch/arm/boot/dts/omap3-zoom3.dts
@@ -9,7 +9,7 @@
/ {
model = "TI Zoom3";
- compatible = "ti,omap3-zoom3", "ti,omap36xx", "ti,omap3";
+ compatible = "ti,omap3-zoom3", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts
index 0abd61108a53..7bfde8aac7ae 100644
--- a/arch/arm/boot/dts/omap3430-sdp.dts
+++ b/arch/arm/boot/dts/omap3430-sdp.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3430 SDP";
- compatible = "ti,omap3430-sdp", "ti,omap3";
+ compatible = "ti,omap3430-sdp", "ti,omap3430", "ti,omap3";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi
index 7b09cbee8bb8..c4dd9801840d 100644
--- a/arch/arm/boot/dts/omap34xx.dtsi
+++ b/arch/arm/boot/dts/omap34xx.dtsi
@@ -16,19 +16,67 @@
/ {
cpus {
cpu: cpu@0 {
- /* OMAP343x/OMAP35xx variants OPP1-5 */
- operating-points = <
- /* kHz uV */
- 125000 975000
- 250000 1075000
- 500000 1200000
- 550000 1270000
- 600000 1350000
- >;
+ /* OMAP343x/OMAP35xx variants OPP1-6 */
+ operating-points-v2 = <&cpu0_opp_table>;
+
clock-latency = <300000>; /* From legacy driver */
};
};
+ /* see Documentation/devicetree/bindings/opp/opp.txt */
+ cpu0_opp_table: opp-table {
+ compatible = "operating-points-v2-ti-cpu";
+ syscon = <&scm_conf>;
+
+ opp1-125000000 {
+ opp-hz = /bits/ 64 <125000000>;
+ /*
+ * we currently only select the max voltage from table
+ * Table 3-3 of the omap3530 Data sheet (SPRS507F).
+ * Format is: <target min max>
+ */
+ opp-microvolt = <975000 975000 975000>;
+ /*
+ * first value is silicon revision bit mask
+ * second one 720MHz Device Identification bit mask
+ */
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp2-250000000 {
+ opp-hz = /bits/ 64 <250000000>;
+ opp-microvolt = <1075000 1075000 1075000>;
+ opp-supported-hw = <0xffffffff 3>;
+ opp-suspend;
+ };
+
+ opp3-500000000 {
+ opp-hz = /bits/ 64 <500000000>;
+ opp-microvolt = <1200000 1200000 1200000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp4-550000000 {
+ opp-hz = /bits/ 64 <550000000>;
+ opp-microvolt = <1275000 1275000 1275000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp5-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <1350000 1350000 1350000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp6-720000000 {
+ opp-hz = /bits/ 64 <720000000>;
+ opp-microvolt = <1350000 1350000 1350000>;
+ /* only high-speed grade omap3530 devices */
+ opp-supported-hw = <0xffffffff 2>;
+ turbo-mode;
+ };
+ };
+
ocp@68000000 {
omap3_pmx_core2: pinmux@480025d8 {
compatible = "ti,omap3-padconf", "pinctrl-single";
diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi
index 1e552f08f120..c618cb257d00 100644
--- a/arch/arm/boot/dts/omap36xx.dtsi
+++ b/arch/arm/boot/dts/omap36xx.dtsi
@@ -19,16 +19,65 @@
};
cpus {
- /* OMAP3630/OMAP37xx 'standard device' variants OPP50 to OPP130 */
+ /* OMAP3630/OMAP37xx variants OPP50 to OPP130 and OPP1G */
cpu: cpu@0 {
- operating-points = <
- /* kHz uV */
- 300000 1012500
- 600000 1200000
- 800000 1325000
- >;
- clock-latency = <300000>; /* From legacy driver */
+ operating-points-v2 = <&cpu0_opp_table>;
+
+ vbb-supply = <&abb_mpu_iva>;
+ clock-latency = <300000>; /* From omap-cpufreq driver */
+ };
+ };
+
+ /* see Documentation/devicetree/bindings/opp/opp.txt */
+ cpu0_opp_table: opp-table {
+ compatible = "operating-points-v2-ti-cpu";
+ syscon = <&scm_conf>;
+
+ opp50-300000000 {
+ opp-hz = /bits/ 64 <300000000>;
+ /*
+ * we currently only select the max voltage from table
+ * Table 4-19 of the DM3730 Data sheet (SPRS685B)
+ * Format is: cpu0-supply: <target min max>
+ * vbb-supply: <target min max>
+ */
+ opp-microvolt = <1012500 1012500 1012500>,
+ <1012500 1012500 1012500>;
+ /*
+ * first value is silicon revision bit mask
+ * second one is "speed binned" bit mask
+ */
+ opp-supported-hw = <0xffffffff 3>;
+ opp-suspend;
+ };
+
+ opp100-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <1200000 1200000 1200000>,
+ <1200000 1200000 1200000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp130-800000000 {
+ opp-hz = /bits/ 64 <800000000>;
+ opp-microvolt = <1325000 1325000 1325000>,
+ <1325000 1325000 1325000>;
+ opp-supported-hw = <0xffffffff 3>;
};
+
+ opp1g-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <1375000 1375000 1375000>,
+ <1375000 1375000 1375000>;
+ /* only on am/dm37x with speed-binned bit set */
+ opp-supported-hw = <0xffffffff 2>;
+ turbo-mode;
+ };
+ };
+
+ opp_supply_mpu_iva: opp_supply {
+ compatible = "ti,omap-opp-supply";
+ ti,absolute-max-voltage-uv = <1375000>;
};
ocp@68000000 {
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index 39a7d9393641..24dd5bbe60e4 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
*/
void imx6q_cpuidle_fec_irqs_used(void)
{
- imx6q_cpuidle_driver.states[1].disabled = true;
+ cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, true);
}
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used);
void imx6q_cpuidle_fec_irqs_unused(void)
{
- imx6q_cpuidle_driver.states[1].disabled = false;
+ cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, false);
}
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused);
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index 2447427cb4a8..69f3fa270fbe 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void)
{
pr_info_once(
"Disabling cpuidle LP2 state, since PCIe IRQs are in use\n");
- tegra_idle_driver.states[1].disabled = true;
+ cpuidle_driver_state_disabled(&tegra_idle_driver, 1, true);
}
int __init tegra20_cpuidle_init(void)
diff --git a/arch/mips/sgi-ip27/Kconfig b/arch/mips/sgi-ip27/Kconfig
index ef3847e7aee0..e5b6cadbec85 100644
--- a/arch/mips/sgi-ip27/Kconfig
+++ b/arch/mips/sgi-ip27/Kconfig
@@ -38,10 +38,3 @@ config REPLICATE_KTEXT
Say Y here to enable replicating the kernel text across multiple
nodes in a NUMA cluster. This trades memory for speed.
-config REPLICATE_EXHANDLERS
- bool "Exception handler replication support"
- depends on SGI_IP27
- help
- Say Y here to enable replicating the kernel exception handlers
- across multiple nodes in a NUMA cluster. This trades memory for
- speed.
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 59d5375c9021..79a52c472782 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -69,23 +69,14 @@ static void per_hub_init(cnodeid_t cnode)
hub_rtc_init(cnode);
-#ifdef CONFIG_REPLICATE_EXHANDLERS
- /*
- * If this is not a headless node initialization,
- * copy over the caliased exception handlers.
- */
- if (get_compact_nodeid() == cnode) {
- extern char except_vec2_generic, except_vec3_generic;
- extern void build_tlb_refill_handler(void);
-
- memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80);
- build_tlb_refill_handler();
- memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80);
- memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100);
+ if (nasid) {
+ /* copy exception handlers from first node to current node */
+ memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0),
+ (void *)CKSEG0, 0x200);
__flush_cache_all();
+ /* switch to node local exception handlers */
+ REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
}
-#endif
}
void per_cpu_init(void)
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index fb077a947575..8624a885d95b 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -332,11 +332,7 @@ static void __init mlreset(void)
* thinks it is a node 0 address.
*/
REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1));
-#ifdef CONFIG_REPLICATE_EXHANDLERS
- REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
-#else
REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0);
-#endif
#ifdef LATER
/*
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 324a23947585..997ffe46e953 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -65,14 +65,14 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
#
-CFLAGS_REMOVE_vdso-note.o = -pg
CFLAGS_REMOVE_vclock_gettime.o = -pg
+CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
-CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1
#This makes sure the $(obj) subdirectory exists even though vdso32/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d6e1faa28c58..8ef85139553f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1940,6 +1940,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
If unsure, say y.
+choice
+ prompt "TSX enable mode"
+ depends on CPU_SUP_INTEL
+ default X86_INTEL_TSX_MODE_OFF
+ help
+ Intel's TSX (Transactional Synchronization Extensions) feature
+ allows to optimize locking protocols through lock elision which
+ can lead to a noticeable performance boost.
+
+ On the other hand it has been shown that TSX can be exploited
+ to form side channel attacks (e.g. TAA) and chances are there
+ will be more of those attacks discovered in the future.
+
+ Therefore TSX is not enabled by default (aka tsx=off). An admin
+ might override this decision by tsx=on the command line parameter.
+ Even with TSX enabled, the kernel will attempt to enable the best
+ possible TAA mitigation setting depending on the microcode available
+ for the particular machine.
+
+ This option allows to set the default tsx mode between tsx=on, =off
+ and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
+ details.
+
+ Say off if not sure, auto if TSX is in use but it should be used on safe
+ platforms or on if TSX is in use and the security aspect of tsx is not
+ relevant.
+
+config X86_INTEL_TSX_MODE_OFF
+ bool "off"
+ help
+ TSX is disabled if possible - equals to tsx=off command line parameter.
+
+config X86_INTEL_TSX_MODE_ON
+ bool "on"
+ help
+ TSX is always enabled on TSX capable HW - equals the tsx=on command
+ line parameter.
+
+config X86_INTEL_TSX_MODE_AUTO
+ bool "auto"
+ help
+ TSX is enabled on TSX capable HW that is believed to be safe against
+ side channel attacks- equals the tsx=auto command line parameter.
+endchoice
+
config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0652d3eed9bd..c4fbe379cc0b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -399,5 +399,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 24d6598dea29..4fc61483919a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -312,9 +312,12 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ struct list_head lpage_disallowed_link;
+
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;
+ bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
@@ -859,6 +862,7 @@ struct kvm_arch {
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
+ struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
@@ -933,6 +937,7 @@ struct kvm_arch {
bool exception_payload_enabled;
struct kvm_pmu_event_filter *pmu_event_filter;
+ struct task_struct *nx_lpage_recovery_thread;
};
struct kvm_vm_stat {
@@ -946,6 +951,7 @@ struct kvm_vm_stat {
ulong mmu_unsync;
ulong remote_tlb_flush;
ulong lpages;
+ ulong nx_lpage_splits;
ulong max_mmu_page_hash_collisions;
};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 20ce682a2540..6a3124664289 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -93,6 +93,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
+#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
+ * The processor is not susceptible to a
+ * machine check error due to modifying the
+ * code page size along with either the
+ * physical address or cache type
+ * without TLB invalidation.
+ */
+#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO BIT(8) /*
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -103,6 +115,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+#define MSR_IA32_TSX_CTRL 0x00000122
+#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 80bc209c0708..5c24a7b35166 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
@@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6e0a3b43d027..54f5d54280f6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -988,4 +988,11 @@ enum mds_mitigations {
MDS_MITIGATION_VMWERV,
};
+enum taa_mitigations {
+ TAA_MITIGATION_OFF,
+ TAA_MITIGATION_UCODE_NEEDED,
+ TAA_MITIGATION_VERW,
+ TAA_MITIGATION_TSX_DISABLED,
+};
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d7a1e5a9331c..890f60083eca 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
ifdef CONFIG_CPU_SUP_INTEL
-obj-y += intel.o intel_pconfig.o
+obj-y += intel.o intel_pconfig.o tsx.o
obj-$(CONFIG_PM) += intel_epb.o
endif
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 91c2561b905f..4c7b0fa15a19 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
+static void __init taa_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -105,6 +106,7 @@ void __init check_bugs(void)
ssb_select_mitigation();
l1tf_select_mitigation();
mds_select_mitigation();
+ taa_select_mitigation();
arch_smt_update();
@@ -269,6 +271,100 @@ static int __init mds_cmdline(char *str)
early_param("mds", mds_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "TAA: " fmt
+
+/* Default mitigation for TAA-affected CPUs */
+static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
+static bool taa_nosmt __ro_after_init;
+
+static const char * const taa_strings[] = {
+ [TAA_MITIGATION_OFF] = "Vulnerable",
+ [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
+ [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled",
+};
+
+static void __init taa_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_TAA)) {
+ taa_mitigation = TAA_MITIGATION_OFF;
+ return;
+ }
+
+ /* TSX previously disabled by tsx=off */
+ if (!boot_cpu_has(X86_FEATURE_RTM)) {
+ taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
+ goto out;
+ }
+
+ if (cpu_mitigations_off()) {
+ taa_mitigation = TAA_MITIGATION_OFF;
+ return;
+ }
+
+ /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
+ if (taa_mitigation == TAA_MITIGATION_OFF)
+ goto out;
+
+ if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ taa_mitigation = TAA_MITIGATION_VERW;
+ else
+ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+ /*
+ * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
+ * A microcode update fixes this behavior to clear CPU buffers. It also
+ * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
+ * ARCH_CAP_TSX_CTRL_MSR bit.
+ *
+ * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
+ * update is required.
+ */
+ ia32_cap = x86_read_arch_cap_msr();
+ if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
+ !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+ /*
+ * TSX is enabled, select alternate mitigation for TAA which is
+ * the same as MDS. Enable MDS static branch to clear CPU buffers.
+ *
+ * For guests that can't determine whether the correct microcode is
+ * present on host, enable the mitigation for UCODE_NEEDED as well.
+ */
+ static_branch_enable(&mds_user_clear);
+
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+
+out:
+ pr_info("%s\n", taa_strings[taa_mitigation]);
+}
+
+static int __init tsx_async_abort_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_TAA))
+ return 0;
+
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ taa_mitigation = TAA_MITIGATION_OFF;
+ } else if (!strcmp(str, "full")) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ } else if (!strcmp(str, "full,nosmt")) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ taa_nosmt = true;
+ }
+
+ return 0;
+}
+early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -786,13 +882,10 @@ static void update_mds_branch_idle(void)
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
void cpu_bugs_smt_update(void)
{
- /* Enhanced IBRS implies STIBP. No update required. */
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
- return;
-
mutex_lock(&spec_ctrl_mutex);
switch (spectre_v2_user) {
@@ -819,6 +912,17 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (taa_mitigation) {
+ case TAA_MITIGATION_VERW:
+ case TAA_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(TAA_MSG_SMT);
+ break;
+ case TAA_MITIGATION_TSX_DISABLED:
+ case TAA_MITIGATION_OFF:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void)
x86_amd_ssb_disable();
}
+bool itlb_multihit_kvm_mitigation;
+EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
+
#undef pr_fmt
#define pr_fmt(fmt) "L1TF: " fmt
@@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf)
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+ if (itlb_multihit_kvm_mitigation)
+ return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
+ else
+ return sprintf(buf, "KVM: Vulnerable\n");
+}
#else
static ssize_t l1tf_show_state(char *buf)
{
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
}
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+ return sprintf(buf, "Processor vulnerable\n");
+}
#endif
static ssize_t mds_show_state(char *buf)
@@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t tsx_async_abort_show_state(char *buf)
+{
+ if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
+ (taa_mitigation == TAA_MITIGATION_OFF))
+ return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ return sprintf(buf, "%s; SMT Host state unknown\n",
+ taa_strings[taa_mitigation]);
+ }
+
+ return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
+ sched_smt_active() ? "vulnerable" : "disabled");
+}
+
static char *stibp_state(void)
{
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MDS:
return mds_show_state(buf);
+ case X86_BUG_TAA:
+ return tsx_async_abort_show_state(buf);
+
+ case X86_BUG_ITLB_MULTIHIT:
+ return itlb_multihit_show_state(buf);
+
default:
break;
}
@@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
{
return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
}
+
+ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+}
+
+ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9ae7d1bcd4f4..fffe21945374 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
-#define NO_SPECULATION BIT(0)
-#define NO_MELTDOWN BIT(1)
-#define NO_SSB BIT(2)
-#define NO_L1TF BIT(3)
-#define NO_MDS BIT(4)
-#define MSBDS_ONLY BIT(5)
-#define NO_SWAPGS BIT(6)
+#define NO_SPECULATION BIT(0)
+#define NO_MELTDOWN BIT(1)
+#define NO_SSB BIT(2)
+#define NO_L1TF BIT(3)
+#define NO_MDS BIT(4)
+#define MSBDS_ONLY BIT(5)
+#define NO_SWAPGS BIT(6)
+#define NO_ITLB_MULTIHIT BIT(7)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1043,27 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
- VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
- VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
- VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
- VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
- VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
-
- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
+
+ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(CORE_YONAH, NO_SSB),
- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS),
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
/*
* Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1073,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
* good enough for our purposes.
*/
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
+
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
{}
};
@@ -1092,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which)
return m && !!(m->driver_data & which);
}
-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+u64 x86_read_arch_cap_msr(void)
{
u64 ia32_cap = 0;
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ return ia32_cap;
+}
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
+ /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
+ if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
+
if (cpu_matches(NO_SPECULATION))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
-
if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1121,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (!cpu_matches(NO_SWAPGS))
setup_force_cpu_bug(X86_BUG_SWAPGS);
+ /*
+ * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
+ * - TSX is supported or
+ * - TSX_CTRL is present
+ *
+ * TSX_CTRL check is needed for cases when TSX could be disabled before
+ * the kernel boot e.g. kexec.
+ * TSX_CTRL check alone is not sufficient for cases when the microcode
+ * update is not present or running as guest that don't get TSX_CTRL.
+ */
+ if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ (cpu_has(c, X86_FEATURE_RTM) ||
+ (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ setup_force_cpu_bug(X86_BUG_TAA);
+
if (cpu_matches(NO_MELTDOWN))
return;
@@ -1554,6 +1583,8 @@ void __init identify_boot_cpu(void)
#endif
cpu_detect_tlb(&boot_cpu_data);
setup_cr_pinning();
+
+ tsx_init();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index c0e2407abdd6..38ab6e115eac 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -44,6 +44,22 @@ struct _tlb_table {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
+#ifdef CONFIG_CPU_SUP_INTEL
+enum tsx_ctrl_states {
+ TSX_CTRL_ENABLE,
+ TSX_CTRL_DISABLE,
+ TSX_CTRL_NOT_SUPPORTED,
+};
+
+extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
+
+extern void __init tsx_init(void);
+extern void tsx_enable(void);
+extern void tsx_disable(void);
+#else
+static inline void tsx_init(void) { }
+#endif /* CONFIG_CPU_SUP_INTEL */
+
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
@@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu);
extern void x86_spec_ctrl_setup_ap(void);
+extern u64 x86_read_arch_cap_msr(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c2fdc00df163..11d5c5950e2d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -762,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c)
detect_tme(c);
init_intel_misc_features(c);
+
+ if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+ tsx_enable();
+ if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+ tsx_disable();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index a46dee8e78db..2e3b06d6bbc6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
}
rdtgrp = rdtgroup_kn_lock_live(of->kn);
- rdt_last_cmd_clear();
if (!rdtgrp) {
ret = -ENOENT;
- rdt_last_cmd_puts("Directory was removed\n");
goto unlock;
}
@@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
int ret;
prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
- rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
- rdt_last_cmd_puts("Directory was removed\n");
goto out_unlock;
}
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
new file mode 100644
index 000000000000..3e20d322bc98
--- /dev/null
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Transactional Synchronization Extensions (TSX) control.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ * Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+ */
+
+#include <linux/cpufeature.h>
+
+#include <asm/cmdline.h>
+
+#include "cpu.h"
+
+enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
+
+void tsx_disable(void)
+{
+ u64 tsx;
+
+ rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+ /* Force all transactions to immediately abort */
+ tsx |= TSX_CTRL_RTM_DISABLE;
+
+ /*
+ * Ensure TSX support is not enumerated in CPUID.
+ * This is visible to userspace and will ensure they
+ * do not waste resources trying TSX transactions that
+ * will always abort.
+ */
+ tsx |= TSX_CTRL_CPUID_CLEAR;
+
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+void tsx_enable(void)
+{
+ u64 tsx;
+
+ rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+ /* Enable the RTM feature in the cpu */
+ tsx &= ~TSX_CTRL_RTM_DISABLE;
+
+ /*
+ * Ensure TSX support is enumerated in CPUID.
+ * This is visible to userspace and will ensure they
+ * can enumerate and use the TSX feature.
+ */
+ tsx &= ~TSX_CTRL_CPUID_CLEAR;
+
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+static bool __init tsx_ctrl_is_supported(void)
+{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
+ /*
+ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+ *
+ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+ * tsx= cmdline requests will do nothing on CPUs without
+ * MSR_IA32_TSX_CTRL support.
+ */
+ return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+}
+
+static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+{
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+ return TSX_CTRL_DISABLE;
+
+ return TSX_CTRL_ENABLE;
+}
+
+void __init tsx_init(void)
+{
+ char arg[5] = {};
+ int ret;
+
+ if (!tsx_ctrl_is_supported())
+ return;
+
+ ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
+ if (ret >= 0) {
+ if (!strcmp(arg, "on")) {
+ tsx_ctrl_state = TSX_CTRL_ENABLE;
+ } else if (!strcmp(arg, "off")) {
+ tsx_ctrl_state = TSX_CTRL_DISABLE;
+ } else if (!strcmp(arg, "auto")) {
+ tsx_ctrl_state = x86_get_tsx_auto_mode();
+ } else {
+ tsx_ctrl_state = TSX_CTRL_DISABLE;
+ pr_err("tsx: invalid option, defaulting to off\n");
+ }
+ } else {
+ /* tsx= not provided */
+ if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
+ tsx_ctrl_state = x86_get_tsx_auto_mode();
+ else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
+ tsx_ctrl_state = TSX_CTRL_DISABLE;
+ else
+ tsx_ctrl_state = TSX_CTRL_ENABLE;
+ }
+
+ if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
+ tsx_disable();
+
+ /*
+ * tsx_disable() will change the state of the
+ * RTM CPUID bit. Clear it here since it is now
+ * expected to be not set.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RTM);
+ } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
+
+ /*
+ * HW defaults TSX to be enabled at bootup.
+ * We may still need the TSX enable support
+ * during init for special cases like
+ * kexec after TSX is disabled.
+ */
+ tsx_enable();
+
+ /*
+ * tsx_enable() will change the state of the
+ * RTM CPUID bit. Force it here since it is now
+ * expected to be set.
+ */
+ setup_force_cpu_cap(X86_FEATURE_RTM);
+ }
+}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6f6b1d04dadf..4cba91ec8049 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
*/
{ PCI_VENDOR_ID_INTEL, 0x0f00,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_INTEL, 0x3ec4,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 24c23c66b226..2ce9da58611e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/hash.h>
#include <linux/kern_levels.h>
+#include <linux/kthread.h>
#include <asm/page.h>
#include <asm/pat.h>
@@ -47,6 +48,35 @@
#include <asm/kvm_page_track.h>
#include "trace.h"
+extern bool itlb_multihit_kvm_mitigation;
+
+static int __read_mostly nx_huge_pages = -1;
+#ifdef CONFIG_PREEMPT_RT
+/* Recovery can cause latency spikes, disable it for PREEMPT_RT. */
+static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
+#else
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+#endif
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+
+static struct kernel_param_ops nx_huge_pages_ops = {
+ .set = set_nx_huge_pages,
+ .get = param_get_bool,
+};
+
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+ .set = set_nx_huge_pages_recovery_ratio,
+ .get = param_get_uint,
+};
+
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+ &nx_huge_pages_recovery_ratio, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+
/*
* When setting this variable to true it enables Two-Dimensional-Paging
* where the hardware walks 2 page tables:
@@ -352,6 +382,11 @@ static inline bool spte_ad_need_write_protect(u64 spte)
return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
}
+static bool is_nx_huge_page_enabled(void)
+{
+ return READ_ONCE(nx_huge_pages);
+}
+
static inline u64 spte_shadow_accessed_mask(u64 spte)
{
MMU_WARN_ON(is_mmio_spte(spte));
@@ -1190,6 +1225,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ if (sp->lpage_disallowed)
+ return;
+
+ ++kvm->stat.nx_lpage_splits;
+ list_add_tail(&sp->lpage_disallowed_link,
+ &kvm->arch.lpage_disallowed_mmu_pages);
+ sp->lpage_disallowed = true;
+}
+
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
@@ -1207,6 +1253,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ --kvm->stat.nx_lpage_splits;
+ sp->lpage_disallowed = false;
+ list_del(&sp->lpage_disallowed_link);
+}
+
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
@@ -2792,6 +2845,9 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
kvm_reload_remote_mmus(kvm);
}
+ if (sp->lpage_disallowed)
+ unaccount_huge_nx_page(kvm, sp);
+
sp->role.invalid = 1;
return list_unstable;
}
@@ -3013,6 +3069,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (!speculative)
spte |= spte_shadow_accessed_mask(spte);
+ if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
+ is_nx_huge_page_enabled()) {
+ pte_access &= ~ACC_EXEC_MASK;
+ }
+
if (pte_access & ACC_EXEC_MASK)
spte |= shadow_x_mask;
else
@@ -3233,9 +3294,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
+ gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
+{
+ int level = *levelp;
+ u64 spte = *it.sptep;
+
+ if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
+ is_nx_huge_page_enabled() &&
+ is_shadow_present_pte(spte) &&
+ !is_large_pte(spte)) {
+ /*
+ * A small SPTE exists for this pfn, but FNAME(fetch)
+ * and __direct_map would like to create a large PTE
+ * instead: just force them to go down another level,
+ * patching back for them into pfn the next 9 bits of
+ * the address.
+ */
+ u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
+ *pfnp |= gfn & page_mask;
+ (*levelp)--;
+ }
+}
+
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
int map_writable, int level, kvm_pfn_t pfn,
- bool prefault)
+ bool prefault, bool lpage_disallowed)
{
struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
@@ -3248,6 +3332,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
trace_kvm_mmu_spte_requested(gpa, level, pfn);
for_each_shadow_entry(vcpu, gpa, it) {
+ /*
+ * We cannot overwrite existing page tables with an NX
+ * large page, as the leaf could be executable.
+ */
+ disallowed_hugepage_adjust(it, gfn, &pfn, &level);
+
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == level)
break;
@@ -3258,6 +3348,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
it.level - 1, true, ACC_ALL);
link_shadow_page(vcpu, it.sptep, sp);
+ if (lpage_disallowed)
+ account_huge_nx_page(vcpu->kvm, sp);
}
}
@@ -3306,7 +3398,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* here.
*/
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
+ !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
PageTransCompoundMap(pfn_to_page(pfn)) &&
!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
@@ -3550,11 +3642,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
{
int r;
int level;
- bool force_pt_level = false;
+ bool force_pt_level;
kvm_pfn_t pfn;
unsigned long mmu_seq;
bool map_writable, write = error_code & PFERR_WRITE_MASK;
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+ is_nx_huge_page_enabled();
+ force_pt_level = lpage_disallowed;
level = mapping_level(vcpu, gfn, &force_pt_level);
if (likely(!force_pt_level)) {
/*
@@ -3588,7 +3683,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+ prefault, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
@@ -4174,6 +4270,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+ is_nx_huge_page_enabled();
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
@@ -4184,8 +4282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (r)
return r;
- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
- PT_DIRECTORY_LEVEL);
+ force_pt_level =
+ lpage_disallowed ||
+ !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
level = mapping_level(vcpu, gfn, &force_pt_level);
if (likely(!force_pt_level)) {
if (level > PT_DIRECTORY_LEVEL &&
@@ -4214,7 +4313,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
+ prefault, lpage_disallowed);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
@@ -5914,9 +6014,9 @@ restart:
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ !kvm_is_zone_device_pfn(pfn) &&
+ PageTransCompoundMap(pfn_to_page(pfn))) {
pte_list_remove(rmap_head, sptep);
if (kvm_available_flush_tlb_with_range())
@@ -6155,10 +6255,59 @@ static void kvm_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
}
+static bool get_nx_auto_mode(void)
+{
+ /* Return true when CPU has the bug, and mitigations are ON */
+ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
+}
+
+static void __set_nx_huge_pages(bool val)
+{
+ nx_huge_pages = itlb_multihit_kvm_mitigation = val;
+}
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+{
+ bool old_val = nx_huge_pages;
+ bool new_val;
+
+ /* In "auto" mode deploy workaround only if CPU has the bug. */
+ if (sysfs_streq(val, "off"))
+ new_val = 0;
+ else if (sysfs_streq(val, "force"))
+ new_val = 1;
+ else if (sysfs_streq(val, "auto"))
+ new_val = get_nx_auto_mode();
+ else if (strtobool(val, &new_val) < 0)
+ return -EINVAL;
+
+ __set_nx_huge_pages(new_val);
+
+ if (new_val != old_val) {
+ struct kvm *kvm;
+
+ mutex_lock(&kvm_lock);
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ mutex_lock(&kvm->slots_lock);
+ kvm_mmu_zap_all_fast(kvm);
+ mutex_unlock(&kvm->slots_lock);
+
+ wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+ }
+ mutex_unlock(&kvm_lock);
+ }
+
+ return 0;
+}
+
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
+ if (nx_huge_pages == -1)
+ __set_nx_huge_pages(get_nx_auto_mode());
+
/*
* MMU roles use union aliasing which is, generally speaking, an
* undefined behavior. However, we supposedly know how compilers behave
@@ -6238,3 +6387,116 @@ void kvm_mmu_module_exit(void)
unregister_shrinker(&mmu_shrinker);
mmu_audit_disable();
}
+
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+{
+ unsigned int old_val;
+ int err;
+
+ old_val = nx_huge_pages_recovery_ratio;
+ err = param_set_uint(val, kp);
+ if (err)
+ return err;
+
+ if (READ_ONCE(nx_huge_pages) &&
+ !old_val && nx_huge_pages_recovery_ratio) {
+ struct kvm *kvm;
+
+ mutex_lock(&kvm_lock);
+
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+
+ mutex_unlock(&kvm_lock);
+ }
+
+ return err;
+}
+
+static void kvm_recover_nx_lpages(struct kvm *kvm)
+{
+ int rcu_idx;
+ struct kvm_mmu_page *sp;
+ unsigned int ratio;
+ LIST_HEAD(invalid_list);
+ ulong to_zap;
+
+ rcu_idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+ to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+ while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+ /*
+ * We use a separate list instead of just using active_mmu_pages
+ * because the number of lpage_disallowed pages is expected to
+ * be relatively small compared to the total.
+ */
+ sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+ struct kvm_mmu_page,
+ lpage_disallowed_link);
+ WARN_ON_ONCE(!sp->lpage_disallowed);
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ WARN_ON_ONCE(sp->lpage_disallowed);
+
+ if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ if (to_zap)
+ cond_resched_lock(&kvm->mmu_lock);
+ }
+ }
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, rcu_idx);
+}
+
+static long get_nx_lpage_recovery_timeout(u64 start_time)
+{
+ return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+ ? start_time + 60 * HZ - get_jiffies_64()
+ : MAX_SCHEDULE_TIMEOUT;
+}
+
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+{
+ u64 start_time;
+ long remaining_time;
+
+ while (true) {
+ start_time = get_jiffies_64();
+ remaining_time = get_nx_lpage_recovery_timeout(start_time);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop() && remaining_time > 0) {
+ schedule_timeout(remaining_time);
+ remaining_time = get_nx_lpage_recovery_timeout(start_time);
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ set_current_state(TASK_RUNNING);
+
+ if (kthread_should_stop())
+ return 0;
+
+ kvm_recover_nx_lpages(kvm);
+ }
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+ int err;
+
+ err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+ "kvm-nx-lpage-recovery",
+ &kvm->arch.nx_lpage_recovery_thread);
+ if (!err)
+ kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+
+ return err;
+}
+
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+{
+ if (kvm->arch.nx_lpage_recovery_thread)
+ kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 11f8ec89433b..d55674f44a18 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+
+int kvm_mmu_post_init_vm(struct kvm *kvm);
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
+
#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7d5cdb3af594..97b21e7fd013 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int write_fault, int hlevel,
- kvm_pfn_t pfn, bool map_writable, bool prefault)
+ kvm_pfn_t pfn, bool map_writable, bool prefault,
+ bool lpage_disallowed)
{
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
- gfn_t base_gfn;
+ gfn_t gfn, base_gfn;
direct_access = gw->pte_access;
@@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- base_gfn = gw->gfn;
+ /*
+ * FNAME(page_fault) might have clobbered the bottom bits of
+ * gw->gfn, restore them from the virtual address.
+ */
+ gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
+ base_gfn = gfn;
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
- base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+
+ /*
+ * We cannot overwrite existing page tables with an NX
+ * large page, as the leaf could be executable.
+ */
+ disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
+
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == hlevel)
break;
@@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
it.level - 1, true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
+ if (lpage_disallowed)
+ account_huge_nx_page(vcpu->kvm, sp);
}
}
@@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int r;
kvm_pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
- bool force_pt_level = false;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+ is_nx_huge_page_enabled();
+ bool force_pt_level = lpage_disallowed;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
- level, pfn, map_writable, prefault);
+ level, pfn, map_writable, prefault, lpage_disallowed);
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
out_unlock:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5d21a4ab28cf..04a8212704c1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
return;
+ /*
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
+ * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
+ * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
+ * correctly.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
+ goto after_clear_sn;
+ }
+
/* The full case. */
do {
old.control = new.control = pi_desc->control;
@@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
} while (cmpxchg64(&pi_desc->control, old.control,
new.control) != old.control);
+after_clear_sn:
+
/*
* Clear SN before reading the bitmap. The VT-d firmware
* writes the bitmap and reads SN atomically (5.2.3 in the
@@ -1291,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
*/
smp_mb__after_atomic();
- if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
+ if (!pi_is_pir_empty(pi_desc))
pi_set_on(pi_desc);
}
@@ -6137,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
if (pi_test_on(&vmx->pi_desc)) {
pi_clear_on(&vmx->pi_desc);
/*
- * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+ * IOMMU can write to PID.ON, so the barrier matters even on UP.
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
@@ -6167,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
- return pi_test_on(vcpu_to_pi_desc(vcpu));
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ return pi_test_on(pi_desc) ||
+ (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
}
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bee16687dc0b..5a0f34b1e226 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
}
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+ return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
static inline void pi_set_sn(struct pi_desc *pi_desc)
{
set_bit(POSTED_INTR_SN,
@@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ff395f812719..5d530521f11d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -213,6 +213,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_unsync", VM_STAT(mmu_unsync) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages, .mode = 0444) },
+ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
{ "max_mmu_page_hash_collisions",
VM_STAT(max_mmu_page_hash_collisions) },
{ NULL }
@@ -1132,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
*
- * This list is modified at module load time to reflect the
+ * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
+ * extract the supported MSRs from the related const lists.
+ * msrs_to_save is selected from the msrs_to_save_all to reflect the
* capabilities of the host cpu. This capabilities test skips MSRs that are
- * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+ * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
* may depend on host virtualization features rather than host cpu features.
*/
-static u32 msrs_to_save[] = {
+static const u32 msrs_to_save_all[] = {
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
@@ -1179,9 +1182,10 @@ static u32 msrs_to_save[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
};
+static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
static unsigned num_msrs_to_save;
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs_all[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@ -1220,7 +1224,7 @@ static u32 emulated_msrs[] = {
* by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
* We always support the "true" VMX control MSRs, even if the host
* processor does not, so I am putting these registers here rather
- * than in msrs_to_save.
+ * than in msrs_to_save_all.
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@ -1239,13 +1243,14 @@ static u32 emulated_msrs[] = {
MSR_KVM_POLL_CONTROL,
};
+static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
* List of msr numbers which are used to expose MSR-based features that
* can be used by a hypervisor to validate requested CPU features.
*/
-static u32 msr_based_features[] = {
+static const u32 msr_based_features_all[] = {
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_PINBASED_CTLS,
@@ -1270,6 +1275,7 @@ static u32 msr_based_features[] = {
MSR_IA32_ARCH_CAPABILITIES,
};
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
static unsigned int num_msr_based_features;
static u64 kvm_get_arch_capabilities(void)
@@ -1280,6 +1286,14 @@ static u64 kvm_get_arch_capabilities(void)
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
/*
+ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
+ * the nested hypervisor runs with NX huge pages. If it is not,
+ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
+ * L1 guests, so it need not worry about its own (L2) guests.
+ */
+ data |= ARCH_CAP_PSCHANGE_MC_NO;
+
+ /*
* If we're doing cache flushes (either "always" or "cond")
* we will do one whenever the guest does a vmlaunch/vmresume.
* If an outer hypervisor is doing the cache flush for us
@@ -1298,6 +1312,25 @@ static u64 kvm_get_arch_capabilities(void)
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
+ /*
+ * On TAA affected systems, export MDS_NO=0 when:
+ * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
+ * - Updated microcode is present. This is detected by
+ * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
+ * that VERW clears CPU buffers.
+ *
+ * When MDS_NO=0 is exported, guests deploy clear CPU buffer
+ * mitigation and don't complain:
+ *
+ * "Vulnerable: Clear CPU buffers attempted, no microcode"
+ *
+ * If TSX is disabled on the system, guests are also mitigated against
+ * TAA and clear CPU buffer mitigation is not required for guests.
+ */
+ if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
+ (data & ARCH_CAP_TSX_CTRL_MSR))
+ data &= ~ARCH_CAP_MDS_NO;
+
return data;
}
@@ -5090,22 +5123,26 @@ static void kvm_init_msr_list(void)
{
struct x86_pmu_capability x86_pmu;
u32 dummy[2];
- unsigned i, j;
+ unsigned i;
BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
- "Please update the fixed PMCs in msrs_to_save[]");
+ "Please update the fixed PMCs in msrs_to_saved_all[]");
perf_get_x86_pmu_capability(&x86_pmu);
- for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
- if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
+ num_msrs_to_save = 0;
+ num_emulated_msrs = 0;
+ num_msr_based_features = 0;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
+ if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
continue;
/*
* Even MSRs that are valid in the host may not be exposed
* to the guests in some cases.
*/
- switch (msrs_to_save[i]) {
+ switch (msrs_to_save_all[i]) {
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported())
continue;
@@ -5133,17 +5170,17 @@ static void kvm_init_msr_list(void)
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
if (!kvm_x86_ops->pt_supported() ||
- msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
+ msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
continue;
break;
case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
- if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+ if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
break;
case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
- if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+ if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
}
@@ -5151,34 +5188,25 @@ static void kvm_init_msr_list(void)
break;
}
- if (j < i)
- msrs_to_save[j] = msrs_to_save[i];
- j++;
+ msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
}
- num_msrs_to_save = j;
- for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
continue;
- if (j < i)
- emulated_msrs[j] = emulated_msrs[i];
- j++;
+ emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
}
- num_emulated_msrs = j;
- for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
struct kvm_msr_entry msr;
- msr.index = msr_based_features[i];
+ msr.index = msr_based_features_all[i];
if (kvm_get_msr_feature(&msr))
continue;
- if (j < i)
- msr_based_features[j] = msr_based_features[i];
- j++;
+ msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
}
- num_msr_based_features = j;
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -9428,6 +9456,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
+ INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
@@ -9456,6 +9485,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return kvm_x86_ops->vm_init(kvm);
}
+int kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return kvm_mmu_post_init_vm(kvm);
+}
+
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
@@ -9557,6 +9591,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
}
EXPORT_SYMBOL_GPL(x86_set_memory_region);
+void kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+ kvm_mmu_pre_destroy_vm(kvm);
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
if (current->mm == kvm->mm) {
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0319d6339822..0c6214497fcc 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
}
}
+
+static
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+ /*
+ * To prevent bfqq's service guarantees from being violated,
+ * bfqq may be left busy, i.e., queued for service, even if
+ * empty (see comments in __bfq_bfqq_expire() for
+ * details). But, if no process will send requests to bfqq any
+ * longer, then there is no point in keeping bfqq queued for
+ * service. In addition, keeping bfqq queued for service, but
+ * with no process ref any longer, may have caused bfqq to be
+ * freed when dequeued from service. But this is assumed to
+ * never happen.
+ */
+ if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) &&
+ bfqq != bfqd->in_service_queue)
+ bfq_del_bfqq_busy(bfqd, bfqq, false);
+
+ bfq_put_queue(bfqq);
+}
+
static void
bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
@@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
*/
new_bfqq->pid = -1;
bfqq->bic = NULL;
- /* release process reference to bfqq */
- bfq_put_queue(bfqq);
+ bfq_release_process_ref(bfqd, bfqq);
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_cooperator(bfqq);
- bfq_put_queue(bfqq); /* release process reference */
+ bfq_release_process_ref(bfqd, bfqq);
}
static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
bfqq = bic_to_bfqq(bic, false);
if (bfqq) {
- /* release process reference on this queue */
- bfq_put_queue(bfqq);
+ bfq_release_process_ref(bfqd, bfqq);
bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
bic_set_bfqq(bic, bfqq, false);
}
@@ -5963,7 +5983,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
bfq_put_cooperator(bfqq);
- bfq_put_queue(bfqq);
+ bfq_release_process_ref(bfqq->bfqd, bfqq);
return NULL;
}
diff --git a/block/bio.c b/block/bio.c
index 8f0ed6228fc5..b1170ec18464 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
- if (bio->bi_vcnt > 0) {
+ if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a7ed434eae03..e01267f99183 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
atomic64_set(&iocg->active_period, cur_period);
/* already activated or breaking leaf-only constraint? */
- for (i = iocg->level; i > 0; i--)
- if (!list_empty(&iocg->active_list))
+ if (!list_empty(&iocg->active_list))
+ goto succeed_unlock;
+ for (i = iocg->level - 1; i > 0; i--)
+ if (!list_empty(&iocg->ancestors[i]->active_list))
goto fail_unlock;
+
if (iocg->child_active_sum)
goto fail_unlock;
@@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
ioc_start_period(ioc, now);
}
+succeed_unlock:
spin_unlock_irq(&ioc->lock);
return true;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..2ae95df2e74f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
return bm_status;
}
+static void wait_for_freeze(void)
+{
+#ifdef CONFIG_X86
+ /* No delay is needed if we are in guest */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return;
+#endif
+ /* Dummy wait op - must do something useless after P_LVL2 read
+ because chipsets cannot guarantee that STPCLK# signal
+ gets asserted in time to freeze execution properly. */
+ inl(acpi_gbl_FADT.xpm_timer_block.address);
+}
+
/**
* acpi_idle_do_entry - enter idle state using the appropriate method
* @cx: cstate data
@@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
} else {
/* IO port based C-state */
inb(cx->address);
- /* Dummy wait op - must do something useless after P_LVL2 read
- because chipsets cannot guarantee that STPCLK# signal
- gets asserted in time to freeze execution properly. */
- inl(acpi_gbl_FADT.xpm_timer_block.address);
+ wait_for_freeze();
}
}
@@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
inb(cx->address);
- /* See comment in acpi_idle_do_entry() */
- inl(acpi_gbl_FADT.xpm_timer_block.address);
+ wait_for_freeze();
} else
return -ENODEV;
}
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index cc37511de866..6265871a4af2 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -554,12 +554,27 @@ ssize_t __weak cpu_show_mds(struct device *dev,
return sprintf(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -568,6 +583,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_spec_store_bypass.attr,
&dev_attr_l1tf.attr,
&dev_attr_mds.attr,
+ &dev_attr_tsx_async_abort.attr,
+ &dev_attr_itlb_multihit.attr,
NULL
};
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 55907c27075b..84c4e1f72cbd 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -872,3 +872,39 @@ int walk_memory_blocks(unsigned long start, unsigned long size,
}
return ret;
}
+
+struct for_each_memory_block_cb_data {
+ walk_memory_blocks_func_t func;
+ void *arg;
+};
+
+static int for_each_memory_block_cb(struct device *dev, void *data)
+{
+ struct memory_block *mem = to_memory_block(dev);
+ struct for_each_memory_block_cb_data *cb_data = data;
+
+ return cb_data->func(mem, cb_data->arg);
+}
+
+/**
+ * for_each_memory_block - walk through all present memory blocks
+ *
+ * @arg: argument passed to func
+ * @func: callback for each memory block walked
+ *
+ * This function walks through all present memory blocks, calling func on
+ * each memory block.
+ *
+ * In case func() returns an error, walking is aborted and the error is
+ * returned.
+ */
+int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
+{
+ struct for_each_memory_block_cb_data cb_data = {
+ .func = func,
+ .arg = arg,
+ };
+
+ return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
+ for_each_memory_block_cb);
+}
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 8db98a1f83dc..bbddb267c2e6 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -188,6 +188,26 @@ void dev_pm_domain_detach(struct device *dev, bool power_off)
EXPORT_SYMBOL_GPL(dev_pm_domain_detach);
/**
+ * dev_pm_domain_start - Start the device through its PM domain.
+ * @dev: Device to start.
+ *
+ * This function should typically be called during probe by a subsystem/driver,
+ * when it needs to start its device from the PM domain's perspective. Note
+ * that, it's assumed that the PM domain is already powered on when this
+ * function is called.
+ *
+ * Returns 0 on success and negative error values on failures.
+ */
+int dev_pm_domain_start(struct device *dev)
+{
+ if (dev->pm_domain && dev->pm_domain->start)
+ return dev->pm_domain->start(dev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_start);
+
+/**
* dev_pm_domain_set - Set PM domain of a device.
* @dev: Device whose PM domain is to be set.
* @pd: PM domain to be set, or NULL.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cc85e87eaf05..8e5725b11ee8 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -634,6 +634,13 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
return ret;
}
+static int genpd_dev_pm_start(struct device *dev)
+{
+ struct generic_pm_domain *genpd = dev_to_genpd(dev);
+
+ return genpd_start_dev(genpd, dev);
+}
+
static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
unsigned long val, void *ptr)
{
@@ -922,24 +929,6 @@ static int __init genpd_power_off_unused(void)
}
late_initcall(genpd_power_off_unused);
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_GENERIC_DOMAINS_OF)
-
-static bool genpd_present(const struct generic_pm_domain *genpd)
-{
- const struct generic_pm_domain *gpd;
-
- if (IS_ERR_OR_NULL(genpd))
- return false;
-
- list_for_each_entry(gpd, &gpd_list, gpd_list_node)
- if (gpd == genpd)
- return true;
-
- return false;
-}
-
-#endif
-
#ifdef CONFIG_PM_SLEEP
/**
@@ -1354,8 +1343,8 @@ static void genpd_syscore_switch(struct device *dev, bool suspend)
{
struct generic_pm_domain *genpd;
- genpd = dev_to_genpd(dev);
- if (!genpd_present(genpd))
+ genpd = dev_to_genpd_safe(dev);
+ if (!genpd)
return;
if (suspend) {
@@ -1805,6 +1794,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
genpd->domain.ops.poweroff_noirq = genpd_poweroff_noirq;
genpd->domain.ops.restore_noirq = genpd_restore_noirq;
genpd->domain.ops.complete = genpd_complete;
+ genpd->domain.start = genpd_dev_pm_start;
if (genpd->flags & GENPD_FLAG_PM_CLK) {
genpd->dev_ops.stop = pm_clk_suspend;
@@ -2020,6 +2010,16 @@ static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
return 0;
}
+static bool genpd_present(const struct generic_pm_domain *genpd)
+{
+ const struct generic_pm_domain *gpd;
+
+ list_for_each_entry(gpd, &gpd_list, gpd_list_node)
+ if (gpd == genpd)
+ return true;
+ return false;
+}
+
/**
* of_genpd_add_provider_simple() - Register a simple PM domain provider
* @np: Device node pointer associated with the PM domain provider.
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 39a06a0cfdaa..444f5c169a0b 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -117,6 +117,13 @@ static inline bool device_pm_initialized(struct device *dev)
return dev->power.in_dpm_list;
}
+/* drivers/base/power/wakeup_stats.c */
+extern int wakeup_source_sysfs_add(struct device *parent,
+ struct wakeup_source *ws);
+extern void wakeup_source_sysfs_remove(struct wakeup_source *ws);
+
+extern int pm_wakeup_source_sysfs_add(struct device *parent);
+
#else /* !CONFIG_PM_SLEEP */
static inline void device_pm_sleep_init(struct device *dev) {}
@@ -141,6 +148,11 @@ static inline bool device_pm_initialized(struct device *dev)
return device_is_registered(dev);
}
+static inline int pm_wakeup_source_sysfs_add(struct device *parent)
+{
+ return 0;
+}
+
#endif /* !CONFIG_PM_SLEEP */
static inline void device_pm_init(struct device *dev)
@@ -149,21 +161,3 @@ static inline void device_pm_init(struct device *dev)
device_pm_sleep_init(dev);
pm_runtime_init(dev);
}
-
-#ifdef CONFIG_PM_SLEEP
-
-/* drivers/base/power/wakeup_stats.c */
-extern int wakeup_source_sysfs_add(struct device *parent,
- struct wakeup_source *ws);
-extern void wakeup_source_sysfs_remove(struct wakeup_source *ws);
-
-extern int pm_wakeup_source_sysfs_add(struct device *parent);
-
-#else /* !CONFIG_PM_SLEEP */
-
-static inline int pm_wakeup_source_sysfs_add(struct device *parent)
-{
- return 0;
-}
-
-#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 5ce77d1ef9fc..8e021082dba8 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -272,7 +272,7 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
{
struct wake_irq *wirq = dev->power.wakeirq;
- if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK)))
+ if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
return;
if (likely(wirq->status & WAKE_IRQ_DEDICATED_MANAGED)) {
@@ -299,7 +299,7 @@ void dev_pm_disable_wake_irq_check(struct device *dev)
{
struct wake_irq *wirq = dev->power.wakeirq;
- if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK)))
+ if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
return;
if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 39136675dae5..13527a0b4e44 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2087,7 +2087,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
struct ceph_osd_data *osd_data;
u64 objno;
- u8 state, new_state, current_state;
+ u8 state, new_state, uninitialized_var(current_state);
bool has_current_state;
void *p;
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 76b73ddf8fd7..10f6368117d8 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
cancel_work_sync(&card->event_work);
+ destroy_workqueue(card->event_wq);
rsxx_destroy_dev(card);
rsxx_dma_destroy(card);
+ destroy_workqueue(card->creg_ctrl.creg_wq);
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 80b850ef1bf6..8d53b8ef545c 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -13,7 +13,6 @@
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/err.h>
-#include <linux/freezer.h>
#include <linux/fs.h>
#include <linux/hw_random.h>
#include <linux/kernel.h>
@@ -422,9 +421,7 @@ static int hwrng_fillfn(void *unused)
{
long rc;
- set_freezable();
-
- while (!kthread_freezable_should_stop(NULL)) {
+ while (!kthread_should_stop()) {
struct hwrng *rng;
rng = get_current_rng();
diff --git a/drivers/char/random.c b/drivers/char/random.c
index de434feb873a..01b8868b9bed 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -327,7 +327,6 @@
#include <linux/percpu.h>
#include <linux/cryptohash.h>
#include <linux/fips.h>
-#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/workqueue.h>
#include <linux/irq.h>
@@ -2500,8 +2499,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
* We'll be woken up again once below random_write_wakeup_thresh,
* or when the calling thread is about to terminate.
*/
- wait_event_freezable(random_write_wait,
- kthread_should_stop() ||
+ wait_event_interruptible(random_write_wait, kthread_should_stop() ||
ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
mix_pool_bytes(poolp, buffer, count);
credit_entropy_bits(poolp, entropy);
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index a905796f7f85..3858d86cf409 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -49,14 +49,6 @@ config ARM_ARMADA_8K_CPUFREQ
If in doubt, say N.
-# big LITTLE core layer and glue drivers
-config ARM_BIG_LITTLE_CPUFREQ
- tristate "Generic ARM big LITTLE CPUfreq driver"
- depends on ARM_CPU_TOPOLOGY && HAVE_CLK
- select PM_OPP
- help
- This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
-
config ARM_SCPI_CPUFREQ
tristate "SCPI based CPUfreq driver"
depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
@@ -69,7 +61,9 @@ config ARM_SCPI_CPUFREQ
config ARM_VEXPRESS_SPC_CPUFREQ
tristate "Versatile Express SPC based CPUfreq driver"
- depends on ARM_BIG_LITTLE_CPUFREQ && ARCH_VEXPRESS_SPC
+ depends on ARM_CPU_TOPOLOGY && HAVE_CLK
+ depends on ARCH_VEXPRESS_SPC
+ select PM_OPP
help
This add the CPUfreq driver support for Versatile Express
big.LITTLE platforms using SPC for power management.
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9a9f5ccd13d9..f6670c4abbb0 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_SFI_CPUFREQ) += sfi-cpufreq.o
##################################################################################
# ARM SoC drivers
-obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o
-
obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o
obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ) += armada-8k-cpufreq.o
obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
deleted file mode 100644
index 7fe52fcddcf1..000000000000
--- a/drivers/cpufreq/arm_big_little.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * ARM big.LITTLE Platforms CPUFreq support
- *
- * Copyright (C) 2013 ARM Ltd.
- * Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
- *
- * Copyright (C) 2013 Linaro.
- * Viresh Kumar <viresh.kumar@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/clk.h>
-#include <linux/cpu.h>
-#include <linux/cpufreq.h>
-#include <linux/cpumask.h>
-#include <linux/cpu_cooling.h>
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/of_platform.h>
-#include <linux/pm_opp.h>
-#include <linux/slab.h>
-#include <linux/topology.h>
-#include <linux/types.h>
-
-#include "arm_big_little.h"
-
-/* Currently we support only two clusters */
-#define A15_CLUSTER 0
-#define A7_CLUSTER 1
-#define MAX_CLUSTERS 2
-
-#ifdef CONFIG_BL_SWITCHER
-#include <asm/bL_switcher.h>
-static bool bL_switching_enabled;
-#define is_bL_switching_enabled() bL_switching_enabled
-#define set_switching_enabled(x) (bL_switching_enabled = (x))
-#else
-#define is_bL_switching_enabled() false
-#define set_switching_enabled(x) do { } while (0)
-#define bL_switch_request(...) do { } while (0)
-#define bL_switcher_put_enabled() do { } while (0)
-#define bL_switcher_get_enabled() do { } while (0)
-#endif
-
-#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq)
-#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
-
-static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
-static const struct cpufreq_arm_bL_ops *arm_bL_ops;
-static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
-static atomic_t cluster_usage[MAX_CLUSTERS + 1];
-
-static unsigned int clk_big_min; /* (Big) clock frequencies */
-static unsigned int clk_little_max; /* Maximum clock frequency (Little) */
-
-static DEFINE_PER_CPU(unsigned int, physical_cluster);
-static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
-
-static struct mutex cluster_lock[MAX_CLUSTERS];
-
-static inline int raw_cpu_to_cluster(int cpu)
-{
- return topology_physical_package_id(cpu);
-}
-
-static inline int cpu_to_cluster(int cpu)
-{
- return is_bL_switching_enabled() ?
- MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
-}
-
-static unsigned int find_cluster_maxfreq(int cluster)
-{
- int j;
- u32 max_freq = 0, cpu_freq;
-
- for_each_online_cpu(j) {
- cpu_freq = per_cpu(cpu_last_req_freq, j);
-
- if ((cluster == per_cpu(physical_cluster, j)) &&
- (max_freq < cpu_freq))
- max_freq = cpu_freq;
- }
-
- pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
- max_freq);
-
- return max_freq;
-}
-
-static unsigned int clk_get_cpu_rate(unsigned int cpu)
-{
- u32 cur_cluster = per_cpu(physical_cluster, cpu);
- u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
-
- /* For switcher we use virtual A7 clock rates */
- if (is_bL_switching_enabled())
- rate = VIRT_FREQ(cur_cluster, rate);
-
- pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
- cur_cluster, rate);
-
- return rate;
-}
-
-static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
-{
- if (is_bL_switching_enabled()) {
- pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq,
- cpu));
-
- return per_cpu(cpu_last_req_freq, cpu);
- } else {
- return clk_get_cpu_rate(cpu);
- }
-}
-
-static unsigned int
-bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
-{
- u32 new_rate, prev_rate;
- int ret;
- bool bLs = is_bL_switching_enabled();
-
- mutex_lock(&cluster_lock[new_cluster]);
-
- if (bLs) {
- prev_rate = per_cpu(cpu_last_req_freq, cpu);
- per_cpu(cpu_last_req_freq, cpu) = rate;
- per_cpu(physical_cluster, cpu) = new_cluster;
-
- new_rate = find_cluster_maxfreq(new_cluster);
- new_rate = ACTUAL_FREQ(new_cluster, new_rate);
- } else {
- new_rate = rate;
- }
-
- pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
- __func__, cpu, old_cluster, new_cluster, new_rate);
-
- ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
- if (!ret) {
- /*
- * FIXME: clk_set_rate hasn't returned an error here however it
- * may be that clk_change_rate failed due to hardware or
- * firmware issues and wasn't able to report that due to the
- * current design of the clk core layer. To work around this
- * problem we will read back the clock rate and check it is
- * correct. This needs to be removed once clk core is fixed.
- */
- if (clk_get_rate(clk[new_cluster]) != new_rate * 1000)
- ret = -EIO;
- }
-
- if (WARN_ON(ret)) {
- pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
- new_cluster);
- if (bLs) {
- per_cpu(cpu_last_req_freq, cpu) = prev_rate;
- per_cpu(physical_cluster, cpu) = old_cluster;
- }
-
- mutex_unlock(&cluster_lock[new_cluster]);
-
- return ret;
- }
-
- mutex_unlock(&cluster_lock[new_cluster]);
-
- /* Recalc freq for old cluster when switching clusters */
- if (old_cluster != new_cluster) {
- pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
- __func__, cpu, old_cluster, new_cluster);
-
- /* Switch cluster */
- bL_switch_request(cpu, new_cluster);
-
- mutex_lock(&cluster_lock[old_cluster]);
-
- /* Set freq of old cluster if there are cpus left on it */
- new_rate = find_cluster_maxfreq(old_cluster);
- new_rate = ACTUAL_FREQ(old_cluster, new_rate);
-
- if (new_rate) {
- pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
- __func__, old_cluster, new_rate);
-
- if (clk_set_rate(clk[old_cluster], new_rate * 1000))
- pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
- __func__, ret, old_cluster);
- }
- mutex_unlock(&cluster_lock[old_cluster]);
- }
-
- return 0;
-}
-
-/* Set clock frequency */
-static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
- unsigned int index)
-{
- u32 cpu = policy->cpu, cur_cluster, new_cluster, actual_cluster;
- unsigned int freqs_new;
- int ret;
-
- cur_cluster = cpu_to_cluster(cpu);
- new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
-
- freqs_new = freq_table[cur_cluster][index].frequency;
-
- if (is_bL_switching_enabled()) {
- if ((actual_cluster == A15_CLUSTER) &&
- (freqs_new < clk_big_min)) {
- new_cluster = A7_CLUSTER;
- } else if ((actual_cluster == A7_CLUSTER) &&
- (freqs_new > clk_little_max)) {
- new_cluster = A15_CLUSTER;
- }
- }
-
- ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs_new);
-
- if (!ret) {
- arch_set_freq_scale(policy->related_cpus, freqs_new,
- policy->cpuinfo.max_freq);
- }
-
- return ret;
-}
-
-static inline u32 get_table_count(struct cpufreq_frequency_table *table)
-{
- int count;
-
- for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
- ;
-
- return count;
-}
-
-/* get the minimum frequency in the cpufreq_frequency_table */
-static inline u32 get_table_min(struct cpufreq_frequency_table *table)
-{
- struct cpufreq_frequency_table *pos;
- uint32_t min_freq = ~0;
- cpufreq_for_each_entry(pos, table)
- if (pos->frequency < min_freq)
- min_freq = pos->frequency;
- return min_freq;
-}
-
-/* get the maximum frequency in the cpufreq_frequency_table */
-static inline u32 get_table_max(struct cpufreq_frequency_table *table)
-{
- struct cpufreq_frequency_table *pos;
- uint32_t max_freq = 0;
- cpufreq_for_each_entry(pos, table)
- if (pos->frequency > max_freq)
- max_freq = pos->frequency;
- return max_freq;
-}
-
-static int merge_cluster_tables(void)
-{
- int i, j, k = 0, count = 1;
- struct cpufreq_frequency_table *table;
-
- for (i = 0; i < MAX_CLUSTERS; i++)
- count += get_table_count(freq_table[i]);
-
- table = kcalloc(count, sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
- freq_table[MAX_CLUSTERS] = table;
-
- /* Add in reverse order to get freqs in increasing order */
- for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
- for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
- j++) {
- table[k].frequency = VIRT_FREQ(i,
- freq_table[i][j].frequency);
- pr_debug("%s: index: %d, freq: %d\n", __func__, k,
- table[k].frequency);
- k++;
- }
- }
-
- table[k].driver_data = k;
- table[k].frequency = CPUFREQ_TABLE_END;
-
- pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
-
- return 0;
-}
-
-static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
- const struct cpumask *cpumask)
-{
- u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
-
- if (!freq_table[cluster])
- return;
-
- clk_put(clk[cluster]);
- dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
- if (arm_bL_ops->free_opp_table)
- arm_bL_ops->free_opp_table(cpumask);
- dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
-}
-
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
- const struct cpumask *cpumask)
-{
- u32 cluster = cpu_to_cluster(cpu_dev->id);
- int i;
-
- if (atomic_dec_return(&cluster_usage[cluster]))
- return;
-
- if (cluster < MAX_CLUSTERS)
- return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
-
- for_each_present_cpu(i) {
- struct device *cdev = get_cpu_device(i);
- if (!cdev) {
- pr_err("%s: failed to get cpu%d device\n", __func__, i);
- return;
- }
-
- _put_cluster_clk_and_freq_table(cdev, cpumask);
- }
-
- /* free virtual table */
- kfree(freq_table[cluster]);
-}
-
-static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
- const struct cpumask *cpumask)
-{
- u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
- int ret;
-
- if (freq_table[cluster])
- return 0;
-
- ret = arm_bL_ops->init_opp_table(cpumask);
- if (ret) {
- dev_err(cpu_dev, "%s: init_opp_table failed, cpu: %d, err: %d\n",
- __func__, cpu_dev->id, ret);
- goto out;
- }
-
- ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table[cluster]);
- if (ret) {
- dev_err(cpu_dev, "%s: failed to init cpufreq table, cpu: %d, err: %d\n",
- __func__, cpu_dev->id, ret);
- goto free_opp_table;
- }
-
- clk[cluster] = clk_get(cpu_dev, NULL);
- if (!IS_ERR(clk[cluster])) {
- dev_dbg(cpu_dev, "%s: clk: %p & freq table: %p, cluster: %d\n",
- __func__, clk[cluster], freq_table[cluster],
- cluster);
- return 0;
- }
-
- dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d, cluster: %d\n",
- __func__, cpu_dev->id, cluster);
- ret = PTR_ERR(clk[cluster]);
- dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
-
-free_opp_table:
- if (arm_bL_ops->free_opp_table)
- arm_bL_ops->free_opp_table(cpumask);
-out:
- dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
- cluster);
- return ret;
-}
-
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
- const struct cpumask *cpumask)
-{
- u32 cluster = cpu_to_cluster(cpu_dev->id);
- int i, ret;
-
- if (atomic_inc_return(&cluster_usage[cluster]) != 1)
- return 0;
-
- if (cluster < MAX_CLUSTERS) {
- ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
- if (ret)
- atomic_dec(&cluster_usage[cluster]);
- return ret;
- }
-
- /*
- * Get data for all clusters and fill virtual cluster with a merge of
- * both
- */
- for_each_present_cpu(i) {
- struct device *cdev = get_cpu_device(i);
- if (!cdev) {
- pr_err("%s: failed to get cpu%d device\n", __func__, i);
- return -ENODEV;
- }
-
- ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
- if (ret)
- goto put_clusters;
- }
-
- ret = merge_cluster_tables();
- if (ret)
- goto put_clusters;
-
- /* Assuming 2 cluster, set clk_big_min and clk_little_max */
- clk_big_min = get_table_min(freq_table[0]);
- clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
-
- pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
- __func__, cluster, clk_big_min, clk_little_max);
-
- return 0;
-
-put_clusters:
- for_each_present_cpu(i) {
- struct device *cdev = get_cpu_device(i);
- if (!cdev) {
- pr_err("%s: failed to get cpu%d device\n", __func__, i);
- return -ENODEV;
- }
-
- _put_cluster_clk_and_freq_table(cdev, cpumask);
- }
-
- atomic_dec(&cluster_usage[cluster]);
-
- return ret;
-}
-
-/* Per-CPU initialization */
-static int bL_cpufreq_init(struct cpufreq_policy *policy)
-{
- u32 cur_cluster = cpu_to_cluster(policy->cpu);
- struct device *cpu_dev;
- int ret;
-
- cpu_dev = get_cpu_device(policy->cpu);
- if (!cpu_dev) {
- pr_err("%s: failed to get cpu%d device\n", __func__,
- policy->cpu);
- return -ENODEV;
- }
-
- if (cur_cluster < MAX_CLUSTERS) {
- int cpu;
-
- cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
-
- for_each_cpu(cpu, policy->cpus)
- per_cpu(physical_cluster, cpu) = cur_cluster;
- } else {
- /* Assumption: during init, we are always running on A15 */
- per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
- }
-
- ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
- if (ret)
- return ret;
-
- policy->freq_table = freq_table[cur_cluster];
- policy->cpuinfo.transition_latency =
- arm_bL_ops->get_transition_latency(cpu_dev);
-
- dev_pm_opp_of_register_em(policy->cpus);
-
- if (is_bL_switching_enabled())
- per_cpu(cpu_last_req_freq, policy->cpu) = clk_get_cpu_rate(policy->cpu);
-
- dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
- return 0;
-}
-
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
- struct device *cpu_dev;
- int cur_cluster = cpu_to_cluster(policy->cpu);
-
- if (cur_cluster < MAX_CLUSTERS) {
- cpufreq_cooling_unregister(cdev[cur_cluster]);
- cdev[cur_cluster] = NULL;
- }
-
- cpu_dev = get_cpu_device(policy->cpu);
- if (!cpu_dev) {
- pr_err("%s: failed to get cpu%d device\n", __func__,
- policy->cpu);
- return -ENODEV;
- }
-
- put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
- dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
-
- return 0;
-}
-
-static void bL_cpufreq_ready(struct cpufreq_policy *policy)
-{
- int cur_cluster = cpu_to_cluster(policy->cpu);
-
- /* Do not register a cpu_cooling device if we are in IKS mode */
- if (cur_cluster >= MAX_CLUSTERS)
- return;
-
- cdev[cur_cluster] = of_cpufreq_cooling_register(policy);
-}
-
-static struct cpufreq_driver bL_cpufreq_driver = {
- .name = "arm-big-little",
- .flags = CPUFREQ_STICKY |
- CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
- CPUFREQ_NEED_INITIAL_FREQ_CHECK,
- .verify = cpufreq_generic_frequency_table_verify,
- .target_index = bL_cpufreq_set_target,
- .get = bL_cpufreq_get_rate,
- .init = bL_cpufreq_init,
- .exit = bL_cpufreq_exit,
- .ready = bL_cpufreq_ready,
- .attr = cpufreq_generic_attr,
-};
-
-#ifdef CONFIG_BL_SWITCHER
-static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
- unsigned long action, void *_arg)
-{
- pr_debug("%s: action: %ld\n", __func__, action);
-
- switch (action) {
- case BL_NOTIFY_PRE_ENABLE:
- case BL_NOTIFY_PRE_DISABLE:
- cpufreq_unregister_driver(&bL_cpufreq_driver);
- break;
-
- case BL_NOTIFY_POST_ENABLE:
- set_switching_enabled(true);
- cpufreq_register_driver(&bL_cpufreq_driver);
- break;
-
- case BL_NOTIFY_POST_DISABLE:
- set_switching_enabled(false);
- cpufreq_register_driver(&bL_cpufreq_driver);
- break;
-
- default:
- return NOTIFY_DONE;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block bL_switcher_notifier = {
- .notifier_call = bL_cpufreq_switcher_notifier,
-};
-
-static int __bLs_register_notifier(void)
-{
- return bL_switcher_register_notifier(&bL_switcher_notifier);
-}
-
-static int __bLs_unregister_notifier(void)
-{
- return bL_switcher_unregister_notifier(&bL_switcher_notifier);
-}
-#else
-static int __bLs_register_notifier(void) { return 0; }
-static int __bLs_unregister_notifier(void) { return 0; }
-#endif
-
-int bL_cpufreq_register(const struct cpufreq_arm_bL_ops *ops)
-{
- int ret, i;
-
- if (arm_bL_ops) {
- pr_debug("%s: Already registered: %s, exiting\n", __func__,
- arm_bL_ops->name);
- return -EBUSY;
- }
-
- if (!ops || !strlen(ops->name) || !ops->init_opp_table ||
- !ops->get_transition_latency) {
- pr_err("%s: Invalid arm_bL_ops, exiting\n", __func__);
- return -ENODEV;
- }
-
- arm_bL_ops = ops;
-
- set_switching_enabled(bL_switcher_get_enabled());
-
- for (i = 0; i < MAX_CLUSTERS; i++)
- mutex_init(&cluster_lock[i]);
-
- ret = cpufreq_register_driver(&bL_cpufreq_driver);
- if (ret) {
- pr_info("%s: Failed registering platform driver: %s, err: %d\n",
- __func__, ops->name, ret);
- arm_bL_ops = NULL;
- } else {
- ret = __bLs_register_notifier();
- if (ret) {
- cpufreq_unregister_driver(&bL_cpufreq_driver);
- arm_bL_ops = NULL;
- } else {
- pr_info("%s: Registered platform driver: %s\n",
- __func__, ops->name);
- }
- }
-
- bL_switcher_put_enabled();
- return ret;
-}
-EXPORT_SYMBOL_GPL(bL_cpufreq_register);
-
-void bL_cpufreq_unregister(const struct cpufreq_arm_bL_ops *ops)
-{
- if (arm_bL_ops != ops) {
- pr_err("%s: Registered with: %s, can't unregister, exiting\n",
- __func__, arm_bL_ops->name);
- return;
- }
-
- bL_switcher_get_enabled();
- __bLs_unregister_notifier();
- cpufreq_unregister_driver(&bL_cpufreq_driver);
- bL_switcher_put_enabled();
- pr_info("%s: Un-registered platform driver: %s\n", __func__,
- arm_bL_ops->name);
- arm_bL_ops = NULL;
-}
-EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
-
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
-MODULE_DESCRIPTION("Generic ARM big LITTLE cpufreq driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
deleted file mode 100644
index 88a176e466c8..000000000000
--- a/drivers/cpufreq/arm_big_little.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * ARM big.LITTLE platform's CPUFreq header file
- *
- * Copyright (C) 2013 ARM Ltd.
- * Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
- *
- * Copyright (C) 2013 Linaro.
- * Viresh Kumar <viresh.kumar@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-#ifndef CPUFREQ_ARM_BIG_LITTLE_H
-#define CPUFREQ_ARM_BIG_LITTLE_H
-
-#include <linux/cpufreq.h>
-#include <linux/device.h>
-#include <linux/types.h>
-
-struct cpufreq_arm_bL_ops {
- char name[CPUFREQ_NAME_LEN];
-
- /*
- * This must set opp table for cpu_dev in a similar way as done by
- * dev_pm_opp_of_add_table().
- */
- int (*init_opp_table)(const struct cpumask *cpumask);
-
- /* Optional */
- int (*get_transition_latency)(struct device *cpu_dev);
- void (*free_opp_table)(const struct cpumask *cpumask);
-};
-
-int bL_cpufreq_register(const struct cpufreq_arm_bL_ops *ops);
-void bL_cpufreq_unregister(const struct cpufreq_arm_bL_ops *ops);
-
-#endif /* CPUFREQ_ARM_BIG_LITTLE_H */
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index bca8d1f47fd2..54bc76743b1f 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -86,7 +86,6 @@ static const struct of_device_id whitelist[] __initconst = {
{ .compatible = "st-ericsson,u9540", },
{ .compatible = "ti,omap2", },
- { .compatible = "ti,omap3", },
{ .compatible = "ti,omap4", },
{ .compatible = "ti,omap5", },
@@ -137,6 +136,7 @@ static const struct of_device_id blacklist[] __initconst = {
{ .compatible = "ti,am33xx", },
{ .compatible = "ti,am43", },
{ .compatible = "ti,dra7", },
+ { .compatible = "ti,omap3", },
{ }
};
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 48a224a6b178..7fc1a686f2f6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -933,6 +933,9 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
struct freq_attr *fattr = to_attr(attr);
ssize_t ret;
+ if (!fattr->show)
+ return -EIO;
+
down_read(&policy->rwsem);
ret = fattr->show(policy, buf);
up_read(&policy->rwsem);
@@ -947,6 +950,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
+ if (!fattr->store)
+ return -EIO;
+
/*
* cpus_read_trylock() is used here to work around a circular lock
* dependency problem with respect to the cpufreq_register_driver().
@@ -2385,7 +2391,10 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
- /* verify the cpu speed can be set within this limit */
+ /*
+ * Verify that the CPU speed can be set within these limits and make sure
+ * that min <= max.
+ */
ret = cpufreq_driver->verify(new_policy);
if (ret)
return ret;
@@ -2628,6 +2637,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
if (cpufreq_disabled())
return -ENODEV;
+ /*
+ * The cpufreq core depends heavily on the availability of device
+ * structure, make sure they are available before proceeding further.
+ */
+ if (!get_cpu_device(0))
+ return -EPROBE_DEFER;
+
if (!driver_data || !driver_data->verify || !driver_data->init ||
!(driver_data->setpolicy || driver_data->target_index ||
driver_data->target) ||
diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c
index 35db14cf3102..85a6efd6b68f 100644
--- a/drivers/cpufreq/imx-cpufreq-dt.c
+++ b/drivers/cpufreq/imx-cpufreq-dt.c
@@ -44,19 +44,19 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev)
mkt_segment = (cell_value & OCOTP_CFG3_MKT_SEGMENT_MASK) >> OCOTP_CFG3_MKT_SEGMENT_SHIFT;
/*
- * Early samples without fuses written report "0 0" which means
- * consumer segment and minimum speed grading.
- *
- * According to datasheet minimum speed grading is not supported for
- * consumer parts so clamp to 1 to avoid warning for "no OPPs"
+ * Early samples without fuses written report "0 0" which may NOT
+ * match any OPP defined in DT. So clamp to minimum OPP defined in
+ * DT to avoid warning for "no OPPs".
*
* Applies to i.MX8M series SoCs.
*/
- if (mkt_segment == 0 && speed_grade == 0 && (
- of_machine_is_compatible("fsl,imx8mm") ||
- of_machine_is_compatible("fsl,imx8mn") ||
- of_machine_is_compatible("fsl,imx8mq")))
- speed_grade = 1;
+ if (mkt_segment == 0 && speed_grade == 0) {
+ if (of_machine_is_compatible("fsl,imx8mm") ||
+ of_machine_is_compatible("fsl,imx8mq"))
+ speed_grade = 1;
+ if (of_machine_is_compatible("fsl,imx8mn"))
+ speed_grade = 0xb;
+ }
supported_hw[0] = BIT(speed_grade);
supported_hw[1] = BIT(mkt_segment);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 8ab31702cf6a..d2fa3e9ccd97 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2662,21 +2662,21 @@ enum {
/* Hardware vendor-specific info that has its own power management modes */
static struct acpi_platform_list plat_info[] __initdata = {
- {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, 0, PSS},
- {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
- {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
+ {"HP ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, NULL, PSS},
+ {"ORACLE", "X4-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X3-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X3-2L ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X3-2B ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "X6-2 ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
+ {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, NULL, PPC},
{ } /* End */
};
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 6061850e59c9..56f4bc0d209e 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -1041,9 +1041,14 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
static int init_chip_info(void)
{
- unsigned int chip[256];
+ unsigned int *chip;
unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX;
+ int ret = 0;
+
+ chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
for_each_possible_cpu(cpu) {
unsigned int id = cpu_to_chip_id(cpu);
@@ -1055,8 +1060,10 @@ static int init_chip_info(void)
}
chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
- if (!chips)
- return -ENOMEM;
+ if (!chips) {
+ ret = -ENOMEM;
+ goto free_and_return;
+ }
for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i];
@@ -1066,7 +1073,9 @@ static int init_chip_info(void)
per_cpu(chip_info, cpu) = &chips[i];
}
- return 0;
+free_and_return:
+ kfree(chip);
+ return ret;
}
static inline void clean_chip_info(void)
diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c
index af0c00dabb22..c6bdfc308e99 100644
--- a/drivers/cpufreq/s3c64xx-cpufreq.c
+++ b/drivers/cpufreq/s3c64xx-cpufreq.c
@@ -19,7 +19,6 @@
static struct regulator *vddarm;
static unsigned long regulator_latency;
-#ifdef CONFIG_CPU_S3C6410
struct s3c64xx_dvfs {
unsigned int vddarm_min;
unsigned int vddarm_max;
@@ -48,7 +47,6 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = {
{ 0, 4, 800000 },
{ 0, 0, CPUFREQ_TABLE_END },
};
-#endif
static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
unsigned int index)
@@ -149,11 +147,6 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
if (policy->cpu != 0)
return -EINVAL;
- if (s3c64xx_freq_table == NULL) {
- pr_err("No frequency information for this CPU\n");
- return -ENODEV;
- }
-
policy->clk = clk_get(NULL, "armclk");
if (IS_ERR(policy->clk)) {
pr_err("Unable to obtain ARMCLK: %ld\n",
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index 2b51e0718c9f..20d1f85d5f5a 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -1,8 +1,6 @@
/*
* System Control and Power Interface (SCPI) based CPUFreq Interface driver
*
- * It provides necessary ops to arm_big_little cpufreq driver.
- *
* Copyright (C) 2015 ARM Ltd.
* Sudeep Holla <sudeep.holla@arm.com>
*
diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
index eca32e443716..9907a165135b 100644
--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
@@ -25,7 +25,7 @@
static struct platform_device *cpufreq_dt_pdev, *sun50i_cpufreq_pdev;
/**
- * sun50i_cpufreq_get_efuse() - Parse and return efuse value present on SoC
+ * sun50i_cpufreq_get_efuse() - Determine speed grade from efuse value
* @versions: Set to the value parsed from efuse
*
* Returns 0 if success.
@@ -69,21 +69,16 @@ static int sun50i_cpufreq_get_efuse(u32 *versions)
return PTR_ERR(speedbin);
efuse_value = (*speedbin >> NVMEM_SHIFT) & NVMEM_MASK;
- switch (efuse_value) {
- case 0b0001:
- *versions = 1;
- break;
- case 0b0011:
- *versions = 2;
- break;
- default:
- /*
- * For other situations, we treat it as bin0.
- * This vf table can be run for any good cpu.
- */
+
+ /*
+ * We treat unexpected efuse values as if the SoC was from
+ * the slowest bin. Expected efuse values are 1-3, slowest
+ * to fastest.
+ */
+ if (efuse_value >= 1 && efuse_value <= 3)
+ *versions = efuse_value - 1;
+ else
*versions = 0;
- break;
- }
kfree(speedbin);
return 0;
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index aeaa883a8c9d..557cb513bf7f 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -31,11 +31,17 @@
#define DRA7_EFUSE_OD_MPU_OPP BIT(1)
#define DRA7_EFUSE_HIGH_MPU_OPP BIT(2)
+#define OMAP3_CONTROL_DEVICE_STATUS 0x4800244C
+#define OMAP3_CONTROL_IDCODE 0x4830A204
+#define OMAP34xx_ProdID_SKUID 0x4830A20C
+#define OMAP3_SYSCON_BASE (0x48000000 + 0x2000 + 0x270)
+
#define VERSION_COUNT 2
struct ti_cpufreq_data;
struct ti_cpufreq_soc_data {
+ const char * const *reg_names;
unsigned long (*efuse_xlate)(struct ti_cpufreq_data *opp_data,
unsigned long efuse);
unsigned long efuse_fallback;
@@ -85,6 +91,13 @@ static unsigned long dra7_efuse_xlate(struct ti_cpufreq_data *opp_data,
return calculated_efuse;
}
+static unsigned long omap3_efuse_xlate(struct ti_cpufreq_data *opp_data,
+ unsigned long efuse)
+{
+ /* OPP enable bit ("Speed Binned") */
+ return BIT(efuse);
+}
+
static struct ti_cpufreq_soc_data am3x_soc_data = {
.efuse_xlate = amx3_efuse_xlate,
.efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ,
@@ -112,6 +125,74 @@ static struct ti_cpufreq_soc_data dra7_soc_data = {
.multi_regulator = true,
};
+/*
+ * OMAP35x TRM (SPRUF98K):
+ * CONTROL_IDCODE (0x4830 A204) describes Silicon revisions.
+ * Control OMAP Status Register 15:0 (Address 0x4800 244C)
+ * to separate between omap3503, omap3515, omap3525, omap3530
+ * and feature presence.
+ * There are encodings for versions limited to 400/266MHz
+ * but we ignore.
+ * Not clear if this also holds for omap34xx.
+ * some eFuse values e.g. CONTROL_FUSE_OPP1_VDD1
+ * are stored in the SYSCON register range
+ * Register 0x4830A20C [ProdID.SKUID] [0:3]
+ * 0x0 for normal 600/430MHz device.
+ * 0x8 for 720/520MHz device.
+ * Not clear what omap34xx value is.
+ */
+
+static struct ti_cpufreq_soc_data omap34xx_soc_data = {
+ .efuse_xlate = omap3_efuse_xlate,
+ .efuse_offset = OMAP34xx_ProdID_SKUID - OMAP3_SYSCON_BASE,
+ .efuse_shift = 3,
+ .efuse_mask = BIT(3),
+ .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE,
+ .multi_regulator = false,
+};
+
+/*
+ * AM/DM37x TRM (SPRUGN4M)
+ * CONTROL_IDCODE (0x4830 A204) describes Silicon revisions.
+ * Control Device Status Register 15:0 (Address 0x4800 244C)
+ * to separate between am3703, am3715, dm3725, dm3730
+ * and feature presence.
+ * Speed Binned = Bit 9
+ * 0 800/600 MHz
+ * 1 1000/800 MHz
+ * some eFuse values e.g. CONTROL_FUSE_OPP 1G_VDD1
+ * are stored in the SYSCON register range.
+ * There is no 0x4830A20C [ProdID.SKUID] register (exists but
+ * seems to always read as 0).
+ */
+
+static const char * const omap3_reg_names[] = {"cpu0", "vbb"};
+
+static struct ti_cpufreq_soc_data omap36xx_soc_data = {
+ .reg_names = omap3_reg_names,
+ .efuse_xlate = omap3_efuse_xlate,
+ .efuse_offset = OMAP3_CONTROL_DEVICE_STATUS - OMAP3_SYSCON_BASE,
+ .efuse_shift = 9,
+ .efuse_mask = BIT(9),
+ .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE,
+ .multi_regulator = true,
+};
+
+/*
+ * AM3517 is quite similar to AM/DM37x except that it has no
+ * high speed grade eFuse and no abb ldo
+ */
+
+static struct ti_cpufreq_soc_data am3517_soc_data = {
+ .efuse_xlate = omap3_efuse_xlate,
+ .efuse_offset = OMAP3_CONTROL_DEVICE_STATUS - OMAP3_SYSCON_BASE,
+ .efuse_shift = 0,
+ .efuse_mask = 0,
+ .rev_offset = OMAP3_CONTROL_IDCODE - OMAP3_SYSCON_BASE,
+ .multi_regulator = false,
+};
+
+
/**
* ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC
* @opp_data: pointer to ti_cpufreq_data context
@@ -128,7 +209,17 @@ static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data,
ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset,
&efuse);
- if (ret) {
+ if (ret == -EIO) {
+ /* not a syscon register! */
+ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE +
+ opp_data->soc_data->efuse_offset, 4);
+
+ if (!regs)
+ return -ENOMEM;
+ efuse = readl(regs);
+ iounmap(regs);
+ }
+ else if (ret) {
dev_err(dev,
"Failed to read the efuse value from syscon: %d\n",
ret);
@@ -159,7 +250,17 @@ static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data,
ret = regmap_read(opp_data->syscon, opp_data->soc_data->rev_offset,
&revision);
- if (ret) {
+ if (ret == -EIO) {
+ /* not a syscon register! */
+ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE +
+ opp_data->soc_data->rev_offset, 4);
+
+ if (!regs)
+ return -ENOMEM;
+ revision = readl(regs);
+ iounmap(regs);
+ }
+ else if (ret) {
dev_err(dev,
"Failed to read the revision number from syscon: %d\n",
ret);
@@ -189,8 +290,14 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data)
static const struct of_device_id ti_cpufreq_of_match[] = {
{ .compatible = "ti,am33xx", .data = &am3x_soc_data, },
+ { .compatible = "ti,am3517", .data = &am3517_soc_data, },
{ .compatible = "ti,am43", .data = &am4x_soc_data, },
{ .compatible = "ti,dra7", .data = &dra7_soc_data },
+ { .compatible = "ti,omap34xx", .data = &omap34xx_soc_data, },
+ { .compatible = "ti,omap36xx", .data = &omap36xx_soc_data, },
+ /* legacy */
+ { .compatible = "ti,omap3430", .data = &omap34xx_soc_data, },
+ { .compatible = "ti,omap3630", .data = &omap36xx_soc_data, },
{},
};
@@ -212,7 +319,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
const struct of_device_id *match;
struct opp_table *ti_opp_table;
struct ti_cpufreq_data *opp_data;
- const char * const reg_names[] = {"vdd", "vbb"};
+ const char * const default_reg_names[] = {"vdd", "vbb"};
int ret;
match = dev_get_platdata(&pdev->dev);
@@ -268,9 +375,13 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
opp_data->opp_table = ti_opp_table;
if (opp_data->soc_data->multi_regulator) {
+ const char * const *reg_names = default_reg_names;
+
+ if (opp_data->soc_data->reg_names)
+ reg_names = opp_data->soc_data->reg_names;
ti_opp_table = dev_pm_opp_set_regulators(opp_data->cpu_dev,
reg_names,
- ARRAY_SIZE(reg_names));
+ ARRAY_SIZE(default_reg_names));
if (IS_ERR(ti_opp_table)) {
dev_pm_opp_put_supported_hw(opp_data->opp_table);
ret = PTR_ERR(ti_opp_table);
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 53237289e606..506e3f2bf53a 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -1,61 +1,592 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Versatile Express SPC CPUFreq Interface driver
*
- * It provides necessary ops to arm_big_little cpufreq driver.
+ * Copyright (C) 2013 - 2019 ARM Ltd.
+ * Sudeep Holla <sudeep.holla@arm.com>
*
- * Copyright (C) 2013 ARM Ltd.
- * Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Copyright (C) 2013 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/clk.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
+#include <linux/device.h>
#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/topology.h>
#include <linux/types.h>
-#include "arm_big_little.h"
+/* Currently we support only two clusters */
+#define A15_CLUSTER 0
+#define A7_CLUSTER 1
+#define MAX_CLUSTERS 2
+
+#ifdef CONFIG_BL_SWITCHER
+#include <asm/bL_switcher.h>
+static bool bL_switching_enabled;
+#define is_bL_switching_enabled() bL_switching_enabled
+#define set_switching_enabled(x) (bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled() false
+#define set_switching_enabled(x) do { } while (0)
+#define bL_switch_request(...) do { } while (0)
+#define bL_switcher_put_enabled() do { } while (0)
+#define bL_switcher_get_enabled() do { } while (0)
+#endif
+
+#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
+
+static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
+static struct clk *clk[MAX_CLUSTERS];
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1];
+
+static unsigned int clk_big_min; /* (Big) clock frequencies */
+static unsigned int clk_little_max; /* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
+
+static inline int raw_cpu_to_cluster(int cpu)
+{
+ return topology_physical_package_id(cpu);
+}
+
+static inline int cpu_to_cluster(int cpu)
+{
+ return is_bL_switching_enabled() ?
+ MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
+}
+
+static unsigned int find_cluster_maxfreq(int cluster)
+{
+ int j;
+ u32 max_freq = 0, cpu_freq;
+
+ for_each_online_cpu(j) {
+ cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+ if (cluster == per_cpu(physical_cluster, j) &&
+ max_freq < cpu_freq)
+ max_freq = cpu_freq;
+ }
+
+ return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
+{
+ u32 cur_cluster = per_cpu(physical_cluster, cpu);
+ u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+ /* For switcher we use virtual A7 clock rates */
+ if (is_bL_switching_enabled())
+ rate = VIRT_FREQ(cur_cluster, rate);
+
+ return rate;
+}
+
+static unsigned int ve_spc_cpufreq_get_rate(unsigned int cpu)
+{
+ if (is_bL_switching_enabled())
+ return per_cpu(cpu_last_req_freq, cpu);
+ else
+ return clk_get_cpu_rate(cpu);
+}
+
+static unsigned int
+ve_spc_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
+{
+ u32 new_rate, prev_rate;
+ int ret;
+ bool bLs = is_bL_switching_enabled();
+
+ mutex_lock(&cluster_lock[new_cluster]);
+
+ if (bLs) {
+ prev_rate = per_cpu(cpu_last_req_freq, cpu);
+ per_cpu(cpu_last_req_freq, cpu) = rate;
+ per_cpu(physical_cluster, cpu) = new_cluster;
+
+ new_rate = find_cluster_maxfreq(new_cluster);
+ new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+ } else {
+ new_rate = rate;
+ }
+
+ ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+ if (!ret) {
+ /*
+ * FIXME: clk_set_rate hasn't returned an error here however it
+ * may be that clk_change_rate failed due to hardware or
+ * firmware issues and wasn't able to report that due to the
+ * current design of the clk core layer. To work around this
+ * problem we will read back the clock rate and check it is
+ * correct. This needs to be removed once clk core is fixed.
+ */
+ if (clk_get_rate(clk[new_cluster]) != new_rate * 1000)
+ ret = -EIO;
+ }
+
+ if (WARN_ON(ret)) {
+ if (bLs) {
+ per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+ per_cpu(physical_cluster, cpu) = old_cluster;
+ }
+
+ mutex_unlock(&cluster_lock[new_cluster]);
+
+ return ret;
+ }
+
+ mutex_unlock(&cluster_lock[new_cluster]);
+
+ /* Recalc freq for old cluster when switching clusters */
+ if (old_cluster != new_cluster) {
+ /* Switch cluster */
+ bL_switch_request(cpu, new_cluster);
+
+ mutex_lock(&cluster_lock[old_cluster]);
+
+ /* Set freq of old cluster if there are cpus left on it */
+ new_rate = find_cluster_maxfreq(old_cluster);
+ new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+ if (new_rate &&
+ clk_set_rate(clk[old_cluster], new_rate * 1000)) {
+ pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+ __func__, ret, old_cluster);
+ }
+ mutex_unlock(&cluster_lock[old_cluster]);
+ }
+
+ return 0;
+}
+
+/* Set clock frequency */
+static int ve_spc_cpufreq_set_target(struct cpufreq_policy *policy,
+ unsigned int index)
+{
+ u32 cpu = policy->cpu, cur_cluster, new_cluster, actual_cluster;
+ unsigned int freqs_new;
+ int ret;
+
+ cur_cluster = cpu_to_cluster(cpu);
+ new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
+
+ freqs_new = freq_table[cur_cluster][index].frequency;
+
+ if (is_bL_switching_enabled()) {
+ if (actual_cluster == A15_CLUSTER && freqs_new < clk_big_min)
+ new_cluster = A7_CLUSTER;
+ else if (actual_cluster == A7_CLUSTER &&
+ freqs_new > clk_little_max)
+ new_cluster = A15_CLUSTER;
+ }
+
+ ret = ve_spc_cpufreq_set_rate(cpu, actual_cluster, new_cluster,
+ freqs_new);
+
+ if (!ret) {
+ arch_set_freq_scale(policy->related_cpus, freqs_new,
+ policy->cpuinfo.max_freq);
+ }
+
+ return ret;
+}
+
+static inline u32 get_table_count(struct cpufreq_frequency_table *table)
+{
+ int count;
+
+ for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
+ ;
+
+ return count;
+}
+
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+ struct cpufreq_frequency_table *pos;
+ u32 min_freq = ~0;
+
+ cpufreq_for_each_entry(pos, table)
+ if (pos->frequency < min_freq)
+ min_freq = pos->frequency;
+ return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+ struct cpufreq_frequency_table *pos;
+ u32 max_freq = 0;
+
+ cpufreq_for_each_entry(pos, table)
+ if (pos->frequency > max_freq)
+ max_freq = pos->frequency;
+ return max_freq;
+}
+
+static bool search_frequency(struct cpufreq_frequency_table *table, int size,
+ unsigned int freq)
+{
+ int count;
+
+ for (count = 0; count < size; count++) {
+ if (table[count].frequency == freq)
+ return true;
+ }
+
+ return false;
+}
+
+static int merge_cluster_tables(void)
+{
+ int i, j, k = 0, count = 1;
+ struct cpufreq_frequency_table *table;
+
+ for (i = 0; i < MAX_CLUSTERS; i++)
+ count += get_table_count(freq_table[i]);
+
+ table = kcalloc(count, sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ freq_table[MAX_CLUSTERS] = table;
+
+ /* Add in reverse order to get freqs in increasing order */
+ for (i = MAX_CLUSTERS - 1; i >= 0; i--, count = k) {
+ for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
+ j++) {
+ if (i == A15_CLUSTER &&
+ search_frequency(table, count, freq_table[i][j].frequency))
+ continue; /* skip duplicates */
+ table[k++].frequency =
+ VIRT_FREQ(i, freq_table[i][j].frequency);
+ }
+ }
+
+ table[k].driver_data = k;
+ table[k].frequency = CPUFREQ_TABLE_END;
+
+ return 0;
+}
-static int ve_spc_init_opp_table(const struct cpumask *cpumask)
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
- struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
+ u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
+
+ if (!freq_table[cluster])
+ return;
+
+ clk_put(clk[cluster]);
+ dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+}
+
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
+{
+ u32 cluster = cpu_to_cluster(cpu_dev->id);
+ int i;
+
+ if (atomic_dec_return(&cluster_usage[cluster]))
+ return;
+
+ if (cluster < MAX_CLUSTERS)
+ return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
+
+ for_each_present_cpu(i) {
+ struct device *cdev = get_cpu_device(i);
+
+ if (!cdev)
+ return;
+
+ _put_cluster_clk_and_freq_table(cdev, cpumask);
+ }
+
+ /* free virtual table */
+ kfree(freq_table[cluster]);
+}
+
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
+{
+ u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
+ int ret;
+
+ if (freq_table[cluster])
+ return 0;
+
/*
* platform specific SPC code must initialise the opp table
* so just check if the OPP count is non-zero
*/
- return dev_pm_opp_get_opp_count(cpu_dev) <= 0;
+ ret = dev_pm_opp_get_opp_count(cpu_dev) <= 0;
+ if (ret)
+ goto out;
+
+ ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table[cluster]);
+ if (ret)
+ goto out;
+
+ clk[cluster] = clk_get(cpu_dev, NULL);
+ if (!IS_ERR(clk[cluster]))
+ return 0;
+
+ dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d, cluster: %d\n",
+ __func__, cpu_dev->id, cluster);
+ ret = PTR_ERR(clk[cluster]);
+ dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+
+out:
+ dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
+ cluster);
+ return ret;
}
-static int ve_spc_get_transition_latency(struct device *cpu_dev)
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
- return 1000000; /* 1 ms */
+ u32 cluster = cpu_to_cluster(cpu_dev->id);
+ int i, ret;
+
+ if (atomic_inc_return(&cluster_usage[cluster]) != 1)
+ return 0;
+
+ if (cluster < MAX_CLUSTERS) {
+ ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
+ if (ret)
+ atomic_dec(&cluster_usage[cluster]);
+ return ret;
+ }
+
+ /*
+ * Get data for all clusters and fill virtual cluster with a merge of
+ * both
+ */
+ for_each_present_cpu(i) {
+ struct device *cdev = get_cpu_device(i);
+
+ if (!cdev)
+ return -ENODEV;
+
+ ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
+ if (ret)
+ goto put_clusters;
+ }
+
+ ret = merge_cluster_tables();
+ if (ret)
+ goto put_clusters;
+
+ /* Assuming 2 cluster, set clk_big_min and clk_little_max */
+ clk_big_min = get_table_min(freq_table[A15_CLUSTER]);
+ clk_little_max = VIRT_FREQ(A7_CLUSTER,
+ get_table_max(freq_table[A7_CLUSTER]));
+
+ return 0;
+
+put_clusters:
+ for_each_present_cpu(i) {
+ struct device *cdev = get_cpu_device(i);
+
+ if (!cdev)
+ return -ENODEV;
+
+ _put_cluster_clk_and_freq_table(cdev, cpumask);
+ }
+
+ atomic_dec(&cluster_usage[cluster]);
+
+ return ret;
}
-static const struct cpufreq_arm_bL_ops ve_spc_cpufreq_ops = {
- .name = "vexpress-spc",
- .get_transition_latency = ve_spc_get_transition_latency,
- .init_opp_table = ve_spc_init_opp_table,
+/* Per-CPU initialization */
+static int ve_spc_cpufreq_init(struct cpufreq_policy *policy)
+{
+ u32 cur_cluster = cpu_to_cluster(policy->cpu);
+ struct device *cpu_dev;
+ int ret;
+
+ cpu_dev = get_cpu_device(policy->cpu);
+ if (!cpu_dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__,
+ policy->cpu);
+ return -ENODEV;
+ }
+
+ if (cur_cluster < MAX_CLUSTERS) {
+ int cpu;
+
+ cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+
+ for_each_cpu(cpu, policy->cpus)
+ per_cpu(physical_cluster, cpu) = cur_cluster;
+ } else {
+ /* Assumption: during init, we are always running on A15 */
+ per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+ }
+
+ ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+ if (ret)
+ return ret;
+
+ policy->freq_table = freq_table[cur_cluster];
+ policy->cpuinfo.transition_latency = 1000000; /* 1 ms */
+
+ dev_pm_opp_of_register_em(policy->cpus);
+
+ if (is_bL_switching_enabled())
+ per_cpu(cpu_last_req_freq, policy->cpu) =
+ clk_get_cpu_rate(policy->cpu);
+
+ dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
+ return 0;
+}
+
+static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy)
+{
+ struct device *cpu_dev;
+ int cur_cluster = cpu_to_cluster(policy->cpu);
+
+ if (cur_cluster < MAX_CLUSTERS) {
+ cpufreq_cooling_unregister(cdev[cur_cluster]);
+ cdev[cur_cluster] = NULL;
+ }
+
+ cpu_dev = get_cpu_device(policy->cpu);
+ if (!cpu_dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__,
+ policy->cpu);
+ return -ENODEV;
+ }
+
+ put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
+ return 0;
+}
+
+static void ve_spc_cpufreq_ready(struct cpufreq_policy *policy)
+{
+ int cur_cluster = cpu_to_cluster(policy->cpu);
+
+ /* Do not register a cpu_cooling device if we are in IKS mode */
+ if (cur_cluster >= MAX_CLUSTERS)
+ return;
+
+ cdev[cur_cluster] = of_cpufreq_cooling_register(policy);
+}
+
+static struct cpufreq_driver ve_spc_cpufreq_driver = {
+ .name = "vexpress-spc",
+ .flags = CPUFREQ_STICKY |
+ CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
+ CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+ .verify = cpufreq_generic_frequency_table_verify,
+ .target_index = ve_spc_cpufreq_set_target,
+ .get = ve_spc_cpufreq_get_rate,
+ .init = ve_spc_cpufreq_init,
+ .exit = ve_spc_cpufreq_exit,
+ .ready = ve_spc_cpufreq_ready,
+ .attr = cpufreq_generic_attr,
};
+#ifdef CONFIG_BL_SWITCHER
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+ unsigned long action, void *_arg)
+{
+ pr_debug("%s: action: %ld\n", __func__, action);
+
+ switch (action) {
+ case BL_NOTIFY_PRE_ENABLE:
+ case BL_NOTIFY_PRE_DISABLE:
+ cpufreq_unregister_driver(&ve_spc_cpufreq_driver);
+ break;
+
+ case BL_NOTIFY_POST_ENABLE:
+ set_switching_enabled(true);
+ cpufreq_register_driver(&ve_spc_cpufreq_driver);
+ break;
+
+ case BL_NOTIFY_POST_DISABLE:
+ set_switching_enabled(false);
+ cpufreq_register_driver(&ve_spc_cpufreq_driver);
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+ .notifier_call = bL_cpufreq_switcher_notifier,
+};
+
+static int __bLs_register_notifier(void)
+{
+ return bL_switcher_register_notifier(&bL_switcher_notifier);
+}
+
+static int __bLs_unregister_notifier(void)
+{
+ return bL_switcher_unregister_notifier(&bL_switcher_notifier);
+}
+#else
+static int __bLs_register_notifier(void) { return 0; }
+static int __bLs_unregister_notifier(void) { return 0; }
+#endif
+
static int ve_spc_cpufreq_probe(struct platform_device *pdev)
{
- return bL_cpufreq_register(&ve_spc_cpufreq_ops);
+ int ret, i;
+
+ set_switching_enabled(bL_switcher_get_enabled());
+
+ for (i = 0; i < MAX_CLUSTERS; i++)
+ mutex_init(&cluster_lock[i]);
+
+ ret = cpufreq_register_driver(&ve_spc_cpufreq_driver);
+ if (ret) {
+ pr_info("%s: Failed registering platform driver: %s, err: %d\n",
+ __func__, ve_spc_cpufreq_driver.name, ret);
+ } else {
+ ret = __bLs_register_notifier();
+ if (ret)
+ cpufreq_unregister_driver(&ve_spc_cpufreq_driver);
+ else
+ pr_info("%s: Registered platform driver: %s\n",
+ __func__, ve_spc_cpufreq_driver.name);
+ }
+
+ bL_switcher_put_enabled();
+ return ret;
}
static int ve_spc_cpufreq_remove(struct platform_device *pdev)
{
- bL_cpufreq_unregister(&ve_spc_cpufreq_ops);
+ bL_switcher_get_enabled();
+ __bLs_unregister_notifier();
+ cpufreq_unregister_driver(&ve_spc_cpufreq_driver);
+ bL_switcher_put_enabled();
+ pr_info("%s: Un-registered platform driver: %s\n", __func__,
+ ve_spc_cpufreq_driver.name);
return 0;
}
@@ -68,4 +599,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = {
};
module_platform_driver(ve_spc_cpufreq_platdrv);
-MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("Vexpress SPC ARM big LITTLE cpufreq driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 84b1ebe212b3..1b299e801f74 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -56,13 +56,10 @@ static u64 get_snooze_timeout(struct cpuidle_device *dev,
return default_snooze_timeout;
for (i = index + 1; i < drv->state_count; i++) {
- struct cpuidle_state *s = &drv->states[i];
- struct cpuidle_state_usage *su = &dev->states_usage[i];
-
- if (s->disabled || su->disable)
+ if (dev->states_usage[i].disable)
continue;
- return s->target_residency * tb_ticks_per_usec;
+ return drv->states[i].target_residency * tb_ticks_per_usec;
}
return default_snooze_timeout;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0895b988fa92..569dbac443bd 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -75,44 +75,45 @@ int cpuidle_play_dead(void)
static int find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
- unsigned int max_latency,
+ u64 max_latency_ns,
unsigned int forbidden_flags,
bool s2idle)
{
- unsigned int latency_req = 0;
+ u64 latency_req = 0;
int i, ret = 0;
for (i = 1; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
- struct cpuidle_state_usage *su = &dev->states_usage[i];
- if (s->disabled || su->disable || s->exit_latency <= latency_req
- || s->exit_latency > max_latency
- || (s->flags & forbidden_flags)
- || (s2idle && !s->enter_s2idle))
+ if (dev->states_usage[i].disable ||
+ s->exit_latency_ns <= latency_req ||
+ s->exit_latency_ns > max_latency_ns ||
+ (s->flags & forbidden_flags) ||
+ (s2idle && !s->enter_s2idle))
continue;
- latency_req = s->exit_latency;
+ latency_req = s->exit_latency_ns;
ret = i;
}
return ret;
}
/**
- * cpuidle_use_deepest_state - Set/clear governor override flag.
- * @enable: New value of the flag.
+ * cpuidle_use_deepest_state - Set/unset governor override mode.
+ * @latency_limit_ns: Idle state exit latency limit (or no override if 0).
*
- * Set/unset the current CPU to use the deepest idle state (override governors
- * going forward if set).
+ * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle
+ * state with exit latency within @latency_limit_ns (override governors going
+ * forward), or do not override governors if it is zero.
*/
-void cpuidle_use_deepest_state(bool enable)
+void cpuidle_use_deepest_state(u64 latency_limit_ns)
{
struct cpuidle_device *dev;
preempt_disable();
dev = cpuidle_get_device();
if (dev)
- dev->use_deepest_state = enable;
+ dev->forced_idle_latency_limit_ns = latency_limit_ns;
preempt_enable();
}
@@ -122,9 +123,10 @@ void cpuidle_use_deepest_state(bool enable)
* @dev: cpuidle device for the given CPU.
*/
int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev,
+ u64 latency_limit_ns)
{
- return find_deepest_state(drv, dev, UINT_MAX, 0, false);
+ return find_deepest_state(drv, dev, latency_limit_ns, 0, false);
}
#ifdef CONFIG_SUSPEND
@@ -180,7 +182,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* that interrupts won't be enabled when it exits and allows the tick to
* be frozen safely.
*/
- index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
+ index = find_deepest_state(drv, dev, U64_MAX, 0, true);
if (index > 0)
enter_s2idle_proper(drv, dev, index);
@@ -209,7 +211,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* CPU as a broadcast timer, this call may fail if it is not available.
*/
if (broadcast && tick_broadcast_enter()) {
- index = find_deepest_state(drv, dev, target_state->exit_latency,
+ index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
CPUIDLE_FLAG_TIMER_STOP, false);
if (index < 0) {
default_idle_call();
@@ -247,7 +249,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
local_irq_enable();
if (entered_state >= 0) {
- s64 diff, delay = drv->states[entered_state].exit_latency;
+ s64 diff, delay = drv->states[entered_state].exit_latency_ns;
int i;
/*
@@ -255,18 +257,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* This can be moved to within driver enter routine,
* but that results in multiple copies of same code.
*/
- diff = ktime_us_delta(time_end, time_start);
- if (diff > INT_MAX)
- diff = INT_MAX;
+ diff = ktime_sub(time_end, time_start);
- dev->last_residency = (int)diff;
- dev->states_usage[entered_state].time += dev->last_residency;
+ dev->last_residency_ns = diff;
+ dev->states_usage[entered_state].time_ns += diff;
dev->states_usage[entered_state].usage++;
- if (diff < drv->states[entered_state].target_residency) {
+ if (diff < drv->states[entered_state].target_residency_ns) {
for (i = entered_state - 1; i >= 0; i--) {
- if (drv->states[i].disabled ||
- dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable)
continue;
/* Shallower states are enabled, so update. */
@@ -275,22 +274,21 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
}
} else if (diff > delay) {
for (i = entered_state + 1; i < drv->state_count; i++) {
- if (drv->states[i].disabled ||
- dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable)
continue;
/*
* Update if a deeper state would have been a
* better match for the observed idle duration.
*/
- if (diff - delay >= drv->states[i].target_residency)
+ if (diff - delay >= drv->states[i].target_residency_ns)
dev->states_usage[entered_state].below++;
break;
}
}
} else {
- dev->last_residency = 0;
+ dev->last_residency_ns = 0;
}
return entered_state;
@@ -380,10 +378,10 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
limit_ns = TICK_NSEC;
for (i = 1; i < drv->state_count; i++) {
- if (drv->states[i].disabled || dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable)
continue;
- limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
+ limit_ns = (u64)drv->states[i].target_residency_ns;
}
dev->poll_limit_ns = limit_ns;
@@ -554,7 +552,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
static void __cpuidle_device_init(struct cpuidle_device *dev)
{
memset(dev->states_usage, 0, sizeof(dev->states_usage));
- dev->last_residency = 0;
+ dev->last_residency_ns = 0;
dev->next_hrtimer = 0;
}
@@ -567,12 +565,16 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
*/
static int __cpuidle_register_device(struct cpuidle_device *dev)
{
- int ret;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ int i, ret;
if (!try_module_get(drv->owner))
return -EINVAL;
+ for (i = 0; i < drv->state_count; i++)
+ if (drv->states[i].disabled)
+ dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
+
per_cpu(cpuidle_devices, dev->cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 80c1a830d991..c76423aaef4d 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -62,24 +62,23 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
* __cpuidle_set_driver - set per CPU driver variables for the given driver.
* @drv: a valid pointer to a struct cpuidle_driver
*
- * For each CPU in the driver's cpumask, unset the registered driver per CPU
- * to @drv.
- *
- * Returns 0 on success, -EBUSY if the CPUs have driver(s) already.
+ * Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver
+ * different from drv already.
*/
static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
{
int cpu;
for_each_cpu(cpu, drv->cpumask) {
+ struct cpuidle_driver *old_drv;
- if (__cpuidle_get_cpu_driver(cpu)) {
- __cpuidle_unset_driver(drv);
+ old_drv = __cpuidle_get_cpu_driver(cpu);
+ if (old_drv && old_drv != drv)
return -EBUSY;
- }
+ }
+ for_each_cpu(cpu, drv->cpumask)
per_cpu(cpuidle_drivers, cpu) = drv;
- }
return 0;
}
@@ -166,16 +165,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
if (!drv->cpumask)
drv->cpumask = (struct cpumask *)cpu_possible_mask;
- /*
- * Look for the timer stop flag in the different states, so that we know
- * if the broadcast timer has to be set up. The loop is in the reverse
- * order, because usually one of the deeper states have this flag set.
- */
- for (i = drv->state_count - 1; i >= 0 ; i--) {
- if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
+ for (i = 0; i < drv->state_count; i++) {
+ struct cpuidle_state *s = &drv->states[i];
+
+ /*
+ * Look for the timer stop flag in the different states and if
+ * it is found, indicate that the broadcast timer has to be set
+ * up.
+ */
+ if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
drv->bctimer = 1;
- break;
- }
+
+ /*
+ * The core will use the target residency and exit latency
+ * values in nanoseconds, but allow drivers to provide them in
+ * microseconds too.
+ */
+ if (s->target_residency > 0)
+ s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
+
+ if (s->exit_latency > 0)
+ s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
}
}
@@ -379,3 +389,31 @@ void cpuidle_driver_unref(void)
spin_unlock(&cpuidle_driver_lock);
}
+
+/**
+ * cpuidle_driver_state_disabled - Disable or enable an idle state
+ * @drv: cpuidle driver owning the state
+ * @idx: State index
+ * @disable: Whether or not to disable the state
+ */
+void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
+ bool disable)
+{
+ unsigned int cpu;
+
+ mutex_lock(&cpuidle_lock);
+
+ for_each_cpu(cpu, drv->cpumask) {
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (!dev)
+ continue;
+
+ if (disable)
+ dev->states_usage[idx].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
+ else
+ dev->states_usage[idx].disable &= ~CPUIDLE_STATE_DISABLED_BY_DRIVER;
+ }
+
+ mutex_unlock(&cpuidle_lock);
+}
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index e9801f26c732..e48271e117a3 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
* cpuidle_governor_latency_req - Compute a latency constraint for CPU
* @cpu: Target CPU
*/
-int cpuidle_governor_latency_req(unsigned int cpu)
+s64 cpuidle_governor_latency_req(unsigned int cpu)
{
int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
struct device *device = get_cpu_device(cpu);
int device_req = dev_pm_qos_raw_resume_latency(device);
- return device_req < global_req ? device_req : global_req;
+ if (device_req > global_req)
+ device_req = global_req;
+
+ return (s64)device_req * NSEC_PER_USEC;
}
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index 7a703d2e0064..cb2a96eafc02 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
bool *stop_tick)
{
- int latency_req = cpuidle_governor_latency_req(dev->cpu);
+ s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
if (!drv->state_count || latency_req == 0) {
*stop_tick = false;
@@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv,
return 0;
}
-static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
+static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
{
unsigned int val;
- u64 block_ns = block_us*NSEC_PER_USEC;
/* Grow cpu_halt_poll_us if
* cpu_halt_poll_us < block_ns < guest_halt_poll_us
@@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index)
dev->last_state_idx = index;
if (index != 0)
- adjust_poll_limit(dev, dev->last_residency);
+ adjust_poll_limit(dev, dev->last_residency_ns);
}
/**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 428eeb832fe7..8e9058c4ea63 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -27,8 +27,8 @@ struct ladder_device_state {
struct {
u32 promotion_count;
u32 demotion_count;
- u32 promotion_time;
- u32 demotion_time;
+ u64 promotion_time_ns;
+ u64 demotion_time_ns;
} threshold;
struct {
int promotion_count;
@@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv,
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state;
- int last_residency, last_idx = dev->last_state_idx;
+ int last_idx = dev->last_state_idx;
int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
- int latency_req = cpuidle_governor_latency_req(dev->cpu);
+ s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+ s64 last_residency;
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) {
@@ -80,14 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state = &ldev->states[last_idx];
- last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
+ last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;
/* consider promotion */
if (last_idx < drv->state_count - 1 &&
- !drv->states[last_idx + 1].disabled &&
!dev->states_usage[last_idx + 1].disable &&
- last_residency > last_state->threshold.promotion_time &&
- drv->states[last_idx + 1].exit_latency <= latency_req) {
+ last_residency > last_state->threshold.promotion_time_ns &&
+ drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
@@ -98,13 +98,12 @@ static int ladder_select_state(struct cpuidle_driver *drv,
/* consider demotion */
if (last_idx > first_idx &&
- (drv->states[last_idx].disabled ||
- dev->states_usage[last_idx].disable ||
- drv->states[last_idx].exit_latency > latency_req)) {
+ (dev->states_usage[last_idx].disable ||
+ drv->states[last_idx].exit_latency_ns > latency_req)) {
int i;
for (i = last_idx - 1; i > first_idx; i--) {
- if (drv->states[i].exit_latency <= latency_req)
+ if (drv->states[i].exit_latency_ns <= latency_req)
break;
}
ladder_do_selection(dev, ldev, last_idx, i);
@@ -112,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
}
if (last_idx > first_idx &&
- last_residency < last_state->threshold.demotion_time) {
+ last_residency < last_state->threshold.demotion_time_ns) {
last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0;
if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
@@ -152,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
lstate->threshold.demotion_count = DEMOTION_COUNT;
if (i < drv->state_count - 1)
- lstate->threshold.promotion_time = state->exit_latency;
+ lstate->threshold.promotion_time_ns = state->exit_latency_ns;
if (i > first_idx)
- lstate->threshold.demotion_time = state->exit_latency;
+ lstate->threshold.demotion_time_ns = state->exit_latency_ns;
}
return 0;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e5a5d0c8d66b..b0a7ad566081 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,22 +19,12 @@
#include <linux/sched/stat.h>
#include <linux/math64.h>
-/*
- * Please note when changing the tuning values:
- * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
- * a scaling operation multiplication may overflow on 32 bit platforms.
- * In that case, #define RESOLUTION as ULL to get 64 bit result:
- * #define RESOLUTION 1024ULL
- *
- * The default values do not overflow.
- */
#define BUCKETS 12
#define INTERVAL_SHIFT 3
#define INTERVALS (1UL << INTERVAL_SHIFT)
#define RESOLUTION 1024
#define DECAY 8
-#define MAX_INTERESTING 50000
-
+#define MAX_INTERESTING (50000 * NSEC_PER_USEC)
/*
* Concepts and ideas behind the menu governor
@@ -120,14 +110,14 @@ struct menu_device {
int needs_update;
int tick_wakeup;
- unsigned int next_timer_us;
+ u64 next_timer_ns;
unsigned int bucket;
unsigned int correction_factor[BUCKETS];
unsigned int intervals[INTERVALS];
int interval_ptr;
};
-static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
+static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
{
int bucket = 0;
@@ -140,15 +130,15 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
if (nr_iowaiters)
bucket = BUCKETS/2;
- if (duration < 10)
+ if (duration_ns < 10ULL * NSEC_PER_USEC)
return bucket;
- if (duration < 100)
+ if (duration_ns < 100ULL * NSEC_PER_USEC)
return bucket + 1;
- if (duration < 1000)
+ if (duration_ns < 1000ULL * NSEC_PER_USEC)
return bucket + 2;
- if (duration < 10000)
+ if (duration_ns < 10000ULL * NSEC_PER_USEC)
return bucket + 3;
- if (duration < 100000)
+ if (duration_ns < 100000ULL * NSEC_PER_USEC)
return bucket + 4;
return bucket + 5;
}
@@ -276,13 +266,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
- int latency_req = cpuidle_governor_latency_req(dev->cpu);
- int i;
- int idx;
- unsigned int interactivity_req;
+ s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
unsigned int predicted_us;
+ u64 predicted_ns;
+ u64 interactivity_req;
unsigned long nr_iowaiters;
ktime_t delta_next;
+ int i, idx;
if (data->needs_update) {
menu_update(drv, dev);
@@ -290,15 +280,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
/* determine the expected residency time, round up */
- data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
+ data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
nr_iowaiters = nr_iowait_cpu(dev->cpu);
- data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
+ data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
- ((data->next_timer_us < drv->states[1].target_residency ||
- latency_req < drv->states[1].exit_latency) &&
- !drv->states[0].disabled && !dev->states_usage[0].disable)) {
+ ((data->next_timer_ns < drv->states[1].target_residency_ns ||
+ latency_req < drv->states[1].exit_latency_ns) &&
+ !dev->states_usage[0].disable)) {
/*
* In this case state[0] will be used no matter what, so return
* it right away and keep the tick running if state[0] is a
@@ -308,18 +298,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return 0;
}
- /*
- * Force the result of multiplication to be 64 bits even if both
- * operands are 32 bits.
- * Make sure to round up for half microseconds.
- */
- predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
- data->correction_factor[data->bucket],
- RESOLUTION * DECAY);
- /*
- * Use the lowest expected idle interval to pick the idle state.
- */
- predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
+ /* Round up the result for half microseconds. */
+ predicted_us = div_u64(data->next_timer_ns *
+ data->correction_factor[data->bucket] +
+ (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
+ RESOLUTION * DECAY * NSEC_PER_USEC);
+ /* Use the lowest expected idle interval to pick the idle state. */
+ predicted_ns = (u64)min(predicted_us,
+ get_typical_interval(data, predicted_us)) *
+ NSEC_PER_USEC;
if (tick_nohz_tick_stopped()) {
/*
@@ -330,14 +317,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* the known time till the closest timer event for the idle
* state selection.
*/
- if (predicted_us < TICK_USEC)
- predicted_us = ktime_to_us(delta_next);
+ if (predicted_ns < TICK_NSEC)
+ predicted_ns = delta_next;
} else {
/*
* Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency.
*/
- interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
+ interactivity_req = div64_u64(predicted_ns,
+ performance_multiplier(nr_iowaiters));
if (latency_req > interactivity_req)
latency_req = interactivity_req;
}
@@ -349,27 +337,26 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
idx = -1;
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
- struct cpuidle_state_usage *su = &dev->states_usage[i];
- if (s->disabled || su->disable)
+ if (dev->states_usage[i].disable)
continue;
if (idx == -1)
idx = i; /* first enabled state */
- if (s->target_residency > predicted_us) {
+ if (s->target_residency_ns > predicted_ns) {
/*
* Use a physical idle state, not busy polling, unless
* a timer is going to trigger soon enough.
*/
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
- s->exit_latency <= latency_req &&
- s->target_residency <= data->next_timer_us) {
- predicted_us = s->target_residency;
+ s->exit_latency_ns <= latency_req &&
+ s->target_residency_ns <= data->next_timer_ns) {
+ predicted_ns = s->target_residency_ns;
idx = i;
break;
}
- if (predicted_us < TICK_USEC)
+ if (predicted_ns < TICK_NSEC)
break;
if (!tick_nohz_tick_stopped()) {
@@ -379,7 +366,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick in that case and let the governor run
* again in the next iteration of the loop.
*/
- predicted_us = drv->states[idx].target_residency;
+ predicted_ns = drv->states[idx].target_residency_ns;
break;
}
@@ -389,13 +376,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* closest timer event, select this one to avoid getting
* stuck in the shallow one for too long.
*/
- if (drv->states[idx].target_residency < TICK_USEC &&
- s->target_residency <= ktime_to_us(delta_next))
+ if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+ s->target_residency_ns <= delta_next)
idx = i;
return idx;
}
- if (s->exit_latency > latency_req)
+ if (s->exit_latency_ns > latency_req)
break;
idx = i;
@@ -409,12 +396,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length.
*/
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
- predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
- unsigned int delta_next_us = ktime_to_us(delta_next);
-
+ predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
*stop_tick = false;
- if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
+ if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
@@ -422,12 +407,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick, so try to correct that.
*/
for (i = idx - 1; i >= 0; i--) {
- if (drv->states[i].disabled ||
- dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable)
continue;
idx = i;
- if (drv->states[i].target_residency <= delta_next_us)
+ if (drv->states[i].target_residency_ns <= delta_next)
break;
}
}
@@ -463,7 +447,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
struct menu_device *data = this_cpu_ptr(&menu_devices);
int last_idx = dev->last_state_idx;
struct cpuidle_state *target = &drv->states[last_idx];
- unsigned int measured_us;
+ u64 measured_ns;
unsigned int new_factor;
/*
@@ -481,7 +465,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* assume the state was never reached and the exit latency is 0.
*/
- if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+ if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) {
/*
* The nohz code said that there wouldn't be any events within
* the tick boundary (if the tick was stopped), but the idle
@@ -491,7 +475,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* have been idle long (but not forever) to help the idle
* duration predictor do a better job next time.
*/
- measured_us = 9 * MAX_INTERESTING / 10;
+ measured_ns = 9 * MAX_INTERESTING / 10;
} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
dev->poll_time_limit) {
/*
@@ -501,28 +485,29 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* the CPU might have been woken up from idle by the next timer.
* Assume that to be the case.
*/
- measured_us = data->next_timer_us;
+ measured_ns = data->next_timer_ns;
} else {
/* measured value */
- measured_us = dev->last_residency;
+ measured_ns = dev->last_residency_ns;
/* Deduct exit latency */
- if (measured_us > 2 * target->exit_latency)
- measured_us -= target->exit_latency;
+ if (measured_ns > 2 * target->exit_latency_ns)
+ measured_ns -= target->exit_latency_ns;
else
- measured_us /= 2;
+ measured_ns /= 2;
}
/* Make sure our coefficients do not exceed unity */
- if (measured_us > data->next_timer_us)
- measured_us = data->next_timer_us;
+ if (measured_ns > data->next_timer_ns)
+ measured_ns = data->next_timer_ns;
/* Update our correction ratio */
new_factor = data->correction_factor[data->bucket];
new_factor -= new_factor / DECAY;
- if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
- new_factor += RESOLUTION * measured_us / data->next_timer_us;
+ if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
+ new_factor += div64_u64(RESOLUTION * measured_ns,
+ data->next_timer_ns);
else
/*
* we were idle so long that we count it as a perfect
@@ -542,7 +527,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket] = new_factor;
/* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = measured_us;
+ data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
if (data->interval_ptr >= INTERVALS)
data->interval_ptr = 0;
}
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index b5a0e498f798..de7e706efd46 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -104,7 +104,7 @@ struct teo_cpu {
u64 sleep_length_ns;
struct teo_idle_state states[CPUIDLE_STATE_MAX];
int interval_idx;
- unsigned int intervals[INTERVALS];
+ u64 intervals[INTERVALS];
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
- unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns);
int i, idx_hit = -1, idx_timer = -1;
- unsigned int measured_us;
+ u64 measured_ns;
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
/*
@@ -127,23 +126,28 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* enough to the closest timer event expected at the idle state
* selection time to be discarded.
*/
- measured_us = UINT_MAX;
+ measured_ns = U64_MAX;
} else {
- unsigned int lat;
+ u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
- lat = drv->states[dev->last_state_idx].exit_latency;
-
- measured_us = ktime_to_us(cpu_data->time_span_ns);
+ /*
+ * The computations below are to determine whether or not the
+ * (saved) time till the next timer event and the measured idle
+ * duration fall into the same "bin", so use last_residency_ns
+ * for that instead of time_span_ns which includes the cpuidle
+ * overhead.
+ */
+ measured_ns = dev->last_residency_ns;
/*
* The delay between the wakeup and the first instruction
* executed by the CPU is not likely to be worst-case every
* time, so take 1/2 of the exit latency as a very rough
* approximation of the average of it.
*/
- if (measured_us >= lat)
- measured_us -= lat / 2;
+ if (measured_ns >= lat_ns)
+ measured_ns -= lat_ns / 2;
else
- measured_us /= 2;
+ measured_ns /= 2;
}
/*
@@ -155,9 +159,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
- if (drv->states[i].target_residency <= sleep_length_us) {
+ if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
idx_timer = i;
- if (drv->states[i].target_residency <= measured_us)
+ if (drv->states[i].target_residency_ns <= measured_ns)
idx_hit = i;
}
}
@@ -193,30 +197,35 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* Save idle duration values corresponding to non-timer wakeups for
* pattern detection.
*/
- cpu_data->intervals[cpu_data->interval_idx++] = measured_us;
+ cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
if (cpu_data->interval_idx > INTERVALS)
cpu_data->interval_idx = 0;
}
+static bool teo_time_ok(u64 interval_ns)
+{
+ return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
+}
+
/**
* teo_find_shallower_state - Find shallower idle state matching given duration.
* @drv: cpuidle driver containing state data.
* @dev: Target CPU.
* @state_idx: Index of the capping idle state.
- * @duration_us: Idle duration value to match.
+ * @duration_ns: Idle duration value to match.
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- unsigned int duration_us)
+ u64 duration_ns)
{
int i;
for (i = state_idx - 1; i >= 0; i--) {
- if (drv->states[i].disabled || dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable)
continue;
state_idx = i;
- if (drv->states[i].target_residency <= duration_us)
+ if (drv->states[i].target_residency_ns <= duration_ns)
break;
}
return state_idx;
@@ -232,9 +241,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
- int latency_req = cpuidle_governor_latency_req(dev->cpu);
- unsigned int duration_us, count;
- int max_early_idx, constraint_idx, idx, i;
+ s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+ u64 duration_ns;
+ unsigned int hits, misses, early_hits;
+ int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
ktime_t delta_tick;
if (dev->last_state_idx >= 0) {
@@ -244,50 +254,92 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
cpu_data->time_span_ns = local_clock();
- cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
- duration_us = ktime_to_us(cpu_data->sleep_length_ns);
+ duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+ cpu_data->sleep_length_ns = duration_ns;
- count = 0;
+ hits = 0;
+ misses = 0;
+ early_hits = 0;
max_early_idx = -1;
+ prev_max_early_idx = -1;
constraint_idx = drv->state_count;
idx = -1;
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
- struct cpuidle_state_usage *su = &dev->states_usage[i];
- if (s->disabled || su->disable) {
+ if (dev->states_usage[i].disable) {
+ /*
+ * Ignore disabled states with target residencies beyond
+ * the anticipated idle duration.
+ */
+ if (s->target_residency_ns > duration_ns)
+ continue;
+
+ /*
+ * This state is disabled, so the range of idle duration
+ * values corresponding to it is covered by the current
+ * candidate state, but still the "hits" and "misses"
+ * metrics of the disabled state need to be used to
+ * decide whether or not the state covering the range in
+ * question is good enough.
+ */
+ hits = cpu_data->states[i].hits;
+ misses = cpu_data->states[i].misses;
+
+ if (early_hits >= cpu_data->states[i].early_hits ||
+ idx < 0)
+ continue;
+
/*
- * If the "early hits" metric of a disabled state is
- * greater than the current maximum, it should be taken
- * into account, because it would be a mistake to select
- * a deeper state with lower "early hits" metric. The
- * index cannot be changed to point to it, however, so
- * just increase the max count alone and let the index
- * still point to a shallower idle state.
+ * If the current candidate state has been the one with
+ * the maximum "early hits" metric so far, the "early
+ * hits" metric of the disabled state replaces the
+ * current "early hits" count to avoid selecting a
+ * deeper state with lower "early hits" metric.
*/
- if (max_early_idx >= 0 &&
- count < cpu_data->states[i].early_hits)
- count = cpu_data->states[i].early_hits;
+ if (max_early_idx == idx) {
+ early_hits = cpu_data->states[i].early_hits;
+ continue;
+ }
+
+ /*
+ * The current candidate state is closer to the disabled
+ * one than the current maximum "early hits" state, so
+ * replace the latter with it, but in case the maximum
+ * "early hits" state index has not been set so far,
+ * check if the current candidate state is not too
+ * shallow for that role.
+ */
+ if (teo_time_ok(drv->states[idx].target_residency_ns)) {
+ prev_max_early_idx = max_early_idx;
+ early_hits = cpu_data->states[i].early_hits;
+ max_early_idx = idx;
+ }
continue;
}
- if (idx < 0)
+ if (idx < 0) {
idx = i; /* first enabled state */
+ hits = cpu_data->states[i].hits;
+ misses = cpu_data->states[i].misses;
+ }
- if (s->target_residency > duration_us)
+ if (s->target_residency_ns > duration_ns)
break;
- if (s->exit_latency > latency_req && constraint_idx > i)
+ if (s->exit_latency_ns > latency_req && constraint_idx > i)
constraint_idx = i;
idx = i;
+ hits = cpu_data->states[i].hits;
+ misses = cpu_data->states[i].misses;
- if (count < cpu_data->states[i].early_hits &&
- !(tick_nohz_tick_stopped() &&
- drv->states[i].target_residency < TICK_USEC)) {
- count = cpu_data->states[i].early_hits;
+ if (early_hits < cpu_data->states[i].early_hits &&
+ teo_time_ok(drv->states[i].target_residency_ns)) {
+ prev_max_early_idx = max_early_idx;
+ early_hits = cpu_data->states[i].early_hits;
max_early_idx = i;
}
}
@@ -300,10 +352,19 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* "early hits" metric, but if that cannot be determined, just use the
* state selected so far.
*/
- if (cpu_data->states[idx].hits <= cpu_data->states[idx].misses &&
- max_early_idx >= 0) {
- idx = max_early_idx;
- duration_us = drv->states[idx].target_residency;
+ if (hits <= misses) {
+ /*
+ * The current candidate state is not suitable, so take the one
+ * whose "early hits" metric is the maximum for the range of
+ * shallower states.
+ */
+ if (idx == max_early_idx)
+ max_early_idx = prev_max_early_idx;
+
+ if (max_early_idx >= 0) {
+ idx = max_early_idx;
+ duration_ns = drv->states[idx].target_residency_ns;
+ }
}
/*
@@ -316,18 +377,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx < 0) {
idx = 0; /* No states enabled. Must use 0. */
} else if (idx > 0) {
+ unsigned int count = 0;
u64 sum = 0;
- count = 0;
-
/*
* Count and sum the most recent idle duration values less than
* the current expected idle duration value.
*/
for (i = 0; i < INTERVALS; i++) {
- unsigned int val = cpu_data->intervals[i];
+ u64 val = cpu_data->intervals[i];
- if (val >= duration_us)
+ if (val >= duration_ns)
continue;
count++;
@@ -339,17 +399,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* values are in the interesting range.
*/
if (count > INTERVALS / 2) {
- unsigned int avg_us = div64_u64(sum, count);
+ u64 avg_ns = div64_u64(sum, count);
/*
* Avoid spending too much time in an idle state that
* would be too shallow.
*/
- if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
- duration_us = avg_us;
- if (drv->states[idx].target_residency > avg_us)
+ if (teo_time_ok(avg_ns)) {
+ duration_ns = avg_ns;
+ if (drv->states[idx].target_residency_ns > avg_ns)
idx = teo_find_shallower_state(drv, dev,
- idx, avg_us);
+ idx, avg_ns);
}
}
}
@@ -359,9 +419,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length.
*/
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
- duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
- unsigned int delta_tick_us = ktime_to_us(delta_tick);
-
+ duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
*stop_tick = false;
/*
@@ -370,8 +428,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* till the closest timer including the tick, try to correct
* that.
*/
- if (idx > 0 && drv->states[idx].target_residency > delta_tick_us)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us);
+ if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
+ idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
}
return idx;
@@ -415,7 +473,7 @@ static int teo_enable_device(struct cpuidle_driver *drv,
memset(cpu_data, 0, sizeof(*cpu_data));
for (i = 0; i < INTERVALS; i++)
- cpu_data->intervals[i] = UINT_MAX;
+ cpu_data->intervals[i] = U64_MAX;
return 0;
}
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index c8fa5f41dfc4..9f1ace9c53da 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -49,6 +49,8 @@ void cpuidle_poll_state_init(struct cpuidle_driver *drv)
snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
state->exit_latency = 0;
state->target_residency = 0;
+ state->exit_latency_ns = 0;
+ state->target_residency_ns = 0;
state->power_usage = -1;
state->enter = poll_idle;
state->disabled = false;
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 2bb2683b493c..38ef770be90d 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -255,25 +255,6 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
return sprintf(buf, "%u\n", state->_name);\
}
-#define define_store_state_ull_function(_name) \
-static ssize_t store_state_##_name(struct cpuidle_state *state, \
- struct cpuidle_state_usage *state_usage, \
- const char *buf, size_t size) \
-{ \
- unsigned long long value; \
- int err; \
- if (!capable(CAP_SYS_ADMIN)) \
- return -EPERM; \
- err = kstrtoull(buf, 0, &value); \
- if (err) \
- return err; \
- if (value) \
- state_usage->_name = 1; \
- else \
- state_usage->_name = 0; \
- return size; \
-}
-
#define define_show_state_ull_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
struct cpuidle_state_usage *state_usage, \
@@ -292,18 +273,60 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
return sprintf(buf, "%s\n", state->_name);\
}
-define_show_state_function(exit_latency)
-define_show_state_function(target_residency)
+#define define_show_state_time_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, \
+ struct cpuidle_state_usage *state_usage, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \
+}
+
+define_show_state_time_function(exit_latency)
+define_show_state_time_function(target_residency)
define_show_state_function(power_usage)
define_show_state_ull_function(usage)
-define_show_state_ull_function(time)
define_show_state_str_function(name)
define_show_state_str_function(desc)
-define_show_state_ull_function(disable)
-define_store_state_ull_function(disable)
define_show_state_ull_function(above)
define_show_state_ull_function(below)
+static ssize_t show_state_time(struct cpuidle_state *state,
+ struct cpuidle_state_usage *state_usage,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns));
+}
+
+static ssize_t show_state_disable(struct cpuidle_state *state,
+ struct cpuidle_state_usage *state_usage,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ state_usage->disable & CPUIDLE_STATE_DISABLED_BY_USER);
+}
+
+static ssize_t store_state_disable(struct cpuidle_state *state,
+ struct cpuidle_state_usage *state_usage,
+ const char *buf, size_t size)
+{
+ unsigned int value;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ err = kstrtouint(buf, 0, &value);
+ if (err)
+ return err;
+
+ if (value)
+ state_usage->disable |= CPUIDLE_STATE_DISABLED_BY_USER;
+ else
+ state_usage->disable &= ~CPUIDLE_STATE_DISABLED_BY_USER;
+
+ return size;
+}
+
define_one_state_ro(name, show_state_name);
define_one_state_ro(desc, show_state_desc);
define_one_state_ro(latency, show_state_exit_latency);
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 446490c9d635..f840e61e5a27 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -160,6 +160,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
int lev, prev_lev, ret = 0;
unsigned long cur_time;
+ lockdep_assert_held(&devfreq->lock);
cur_time = jiffies;
/* Immediately exit if previous_freq is not initialized yet. */
@@ -409,6 +410,9 @@ static void devfreq_monitor(struct work_struct *work)
*/
void devfreq_monitor_start(struct devfreq *devfreq)
{
+ if (devfreq->governor->interrupt_driven)
+ return;
+
INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
if (devfreq->profile->polling_ms)
queue_delayed_work(devfreq_wq, &devfreq->work,
@@ -426,6 +430,9 @@ EXPORT_SYMBOL(devfreq_monitor_start);
*/
void devfreq_monitor_stop(struct devfreq *devfreq)
{
+ if (devfreq->governor->interrupt_driven)
+ return;
+
cancel_delayed_work_sync(&devfreq->work);
}
EXPORT_SYMBOL(devfreq_monitor_stop);
@@ -453,6 +460,10 @@ void devfreq_monitor_suspend(struct devfreq *devfreq)
devfreq_update_status(devfreq, devfreq->previous_freq);
devfreq->stop_polling = true;
mutex_unlock(&devfreq->lock);
+
+ if (devfreq->governor->interrupt_driven)
+ return;
+
cancel_delayed_work_sync(&devfreq->work);
}
EXPORT_SYMBOL(devfreq_monitor_suspend);
@@ -473,11 +484,15 @@ void devfreq_monitor_resume(struct devfreq *devfreq)
if (!devfreq->stop_polling)
goto out;
+ if (devfreq->governor->interrupt_driven)
+ goto out_update;
+
if (!delayed_work_pending(&devfreq->work) &&
devfreq->profile->polling_ms)
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
+out_update:
devfreq->last_stat_updated = jiffies;
devfreq->stop_polling = false;
@@ -509,6 +524,9 @@ void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay)
if (devfreq->stop_polling)
goto out;
+ if (devfreq->governor->interrupt_driven)
+ goto out;
+
/* if new delay is zero, stop polling */
if (!new_delay) {
mutex_unlock(&devfreq->lock);
@@ -625,7 +643,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq = find_device_devfreq(dev);
mutex_unlock(&devfreq_list_lock);
if (!IS_ERR(devfreq)) {
- dev_err(dev, "%s: Unable to create devfreq for the device.\n",
+ dev_err(dev, "%s: devfreq device already exists!\n",
__func__);
err = -EINVAL;
goto err_out;
@@ -1195,7 +1213,7 @@ static ssize_t available_governors_show(struct device *d,
* The devfreq with immutable governor (e.g., passive) shows
* only own governor.
*/
- if (df->governor->immutable) {
+ if (df->governor && df->governor->immutable) {
count = scnprintf(&buf[count], DEVFREQ_NAME_LEN,
"%s ", df->governor_name);
/*
@@ -1397,12 +1415,17 @@ static ssize_t trans_stat_show(struct device *dev,
int i, j;
unsigned int max_state = devfreq->profile->max_state;
- if (!devfreq->stop_polling &&
- devfreq_update_status(devfreq, devfreq->previous_freq))
- return 0;
if (max_state == 0)
return sprintf(buf, "Not Supported.\n");
+ mutex_lock(&devfreq->lock);
+ if (!devfreq->stop_polling &&
+ devfreq_update_status(devfreq, devfreq->previous_freq)) {
+ mutex_unlock(&devfreq->lock);
+ return 0;
+ }
+ mutex_unlock(&devfreq->lock);
+
len = sprintf(buf, " From : To\n");
len += sprintf(buf + len, " :");
for (i = 0; i < max_state; i++)
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 87b42055e6bc..85c7a77bf3f0 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -673,7 +673,6 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
for (i = 0; i < info->num_events; i++) {
edev[i] = devm_devfreq_event_add_edev(&pdev->dev, &desc[i]);
if (IS_ERR(edev[i])) {
- ret = PTR_ERR(edev[i]);
dev_err(&pdev->dev,
"failed to add devfreq-event device\n");
return PTR_ERR(edev[i]);
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index bbe5ff9fcecf..dc7533ccc3db 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -31,6 +31,8 @@
* @name: Governor's name
* @immutable: Immutable flag for governor. If the value is 1,
* this govenror is never changeable to other governor.
+ * @interrupt_driven: Devfreq core won't schedule polling work for this
+ * governor if value is set to 1.
* @get_target_freq: Returns desired operating frequency for the device.
* Basically, get_target_freq will run
* devfreq_dev_profile.get_dev_status() to get the
@@ -49,6 +51,7 @@ struct devfreq_governor {
const char name[DEVFREQ_NAME_LEN];
const unsigned int immutable;
+ const unsigned int interrupt_driven;
int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
int (*event_handler)(struct devfreq *devfreq,
unsigned int event, void *data);
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index a6ba75f4106d..0b65f89d74d5 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -11,11 +11,13 @@
#include <linux/devfreq.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/module.h>
-#include <linux/mod_devicetable.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm_opp.h>
#include <linux/reset.h>
+#include <linux/workqueue.h>
#include "governor.h"
@@ -33,6 +35,8 @@
#define ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN BIT(30)
#define ACTMON_DEV_CTRL_ENB BIT(31)
+#define ACTMON_DEV_CTRL_STOP 0x00000000
+
#define ACTMON_DEV_UPPER_WMARK 0x4
#define ACTMON_DEV_LOWER_WMARK 0x8
#define ACTMON_DEV_INIT_AVG 0xc
@@ -68,6 +72,8 @@
#define KHZ 1000
+#define KHZ_MAX (ULONG_MAX / KHZ)
+
/* Assume that the bus is saturated if the utilization is 25% */
#define BUS_SATURATION_RATIO 25
@@ -90,9 +96,10 @@ struct tegra_devfreq_device_config {
unsigned int boost_down_threshold;
/*
- * Threshold of activity (cycles) below which the CPU frequency isn't
- * to be taken into account. This is to avoid increasing the EMC
- * frequency when the CPU is very busy but not accessing the bus often.
+ * Threshold of activity (cycles translated to kHz) below which the
+ * CPU frequency isn't to be taken into account. This is to avoid
+ * increasing the EMC frequency when the CPU is very busy but not
+ * accessing the bus often.
*/
u32 avg_dependency_threshold;
};
@@ -102,7 +109,7 @@ enum tegra_actmon_device {
MCCPU,
};
-static struct tegra_devfreq_device_config actmon_device_configs[] = {
+static const struct tegra_devfreq_device_config actmon_device_configs[] = {
{
/* MCALL: All memory accesses (including from the CPUs) */
.offset = 0x1c0,
@@ -117,10 +124,10 @@ static struct tegra_devfreq_device_config actmon_device_configs[] = {
.offset = 0x200,
.irq_mask = 1 << 25,
.boost_up_coeff = 800,
- .boost_down_coeff = 90,
+ .boost_down_coeff = 40,
.boost_up_threshold = 27,
.boost_down_threshold = 10,
- .avg_dependency_threshold = 50000,
+ .avg_dependency_threshold = 16000, /* 16MHz in kHz units */
},
};
@@ -156,11 +163,16 @@ struct tegra_devfreq {
struct clk *emc_clock;
unsigned long max_freq;
unsigned long cur_freq;
- struct notifier_block rate_change_nb;
+ struct notifier_block clk_rate_change_nb;
+
+ struct delayed_work cpufreq_update_work;
+ struct notifier_block cpu_rate_change_nb;
struct tegra_devfreq_device devices[ARRAY_SIZE(actmon_device_configs)];
- int irq;
+ unsigned int irq;
+
+ bool started;
};
struct tegra_actmon_emc_ratio {
@@ -168,8 +180,8 @@ struct tegra_actmon_emc_ratio {
unsigned long emc_freq;
};
-static struct tegra_actmon_emc_ratio actmon_emc_ratios[] = {
- { 1400000, ULONG_MAX },
+static const struct tegra_actmon_emc_ratio actmon_emc_ratios[] = {
+ { 1400000, KHZ_MAX },
{ 1200000, 750000 },
{ 1100000, 600000 },
{ 1000000, 500000 },
@@ -199,18 +211,26 @@ static void device_writel(struct tegra_devfreq_device *dev, u32 val,
writel_relaxed(val, dev->regs + offset);
}
-static unsigned long do_percent(unsigned long val, unsigned int pct)
+static unsigned long do_percent(unsigned long long val, unsigned int pct)
{
- return val * pct / 100;
+ val = val * pct;
+ do_div(val, 100);
+
+ /*
+ * High freq + high boosting percent + large polling interval are
+ * resulting in integer overflow when watermarks are calculated.
+ */
+ return min_t(u64, val, U32_MAX);
}
static void tegra_devfreq_update_avg_wmark(struct tegra_devfreq *tegra,
struct tegra_devfreq_device *dev)
{
- u32 avg = dev->avg_count;
u32 avg_band_freq = tegra->max_freq * ACTMON_DEFAULT_AVG_BAND / KHZ;
- u32 band = avg_band_freq * ACTMON_SAMPLING_PERIOD;
+ u32 band = avg_band_freq * tegra->devfreq->profile->polling_ms;
+ u32 avg;
+ avg = min(dev->avg_count, U32_MAX - band);
device_writel(dev, avg + band, ACTMON_DEV_AVG_UPPER_WMARK);
avg = max(dev->avg_count, band);
@@ -220,7 +240,7 @@ static void tegra_devfreq_update_avg_wmark(struct tegra_devfreq *tegra,
static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
struct tegra_devfreq_device *dev)
{
- u32 val = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
+ u32 val = tegra->cur_freq * tegra->devfreq->profile->polling_ms;
device_writel(dev, do_percent(val, dev->config->boost_up_threshold),
ACTMON_DEV_UPPER_WMARK);
@@ -229,12 +249,6 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
ACTMON_DEV_LOWER_WMARK);
}
-static void actmon_write_barrier(struct tegra_devfreq *tegra)
-{
- /* ensure the update has reached the ACTMON */
- readl(tegra->regs + ACTMON_GLB_STATUS);
-}
-
static void actmon_isr_device(struct tegra_devfreq *tegra,
struct tegra_devfreq_device *dev)
{
@@ -256,10 +270,10 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
- if (dev->boost_freq >= tegra->max_freq)
+ if (dev->boost_freq >= tegra->max_freq) {
+ dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
dev->boost_freq = tegra->max_freq;
- else
- dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
+ }
} else if (intr_status & ACTMON_DEV_INTR_CONSECUTIVE_LOWER) {
/*
* new_boost = old_boost * down_coef
@@ -270,31 +284,22 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
- if (dev->boost_freq < (ACTMON_BOOST_FREQ_STEP >> 1))
- dev->boost_freq = 0;
- else
- dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
- }
-
- if (dev->config->avg_dependency_threshold) {
- if (dev->avg_count >= dev->config->avg_dependency_threshold)
- dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
- else if (dev->boost_freq == 0)
+ if (dev->boost_freq < (ACTMON_BOOST_FREQ_STEP >> 1)) {
dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
+ dev->boost_freq = 0;
+ }
}
device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
-
- actmon_write_barrier(tegra);
}
static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
unsigned long cpu_freq)
{
unsigned int i;
- struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
+ const struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
if (cpu_freq >= ratio->cpu_freq) {
@@ -308,25 +313,37 @@ static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
return 0;
}
+static unsigned long actmon_device_target_freq(struct tegra_devfreq *tegra,
+ struct tegra_devfreq_device *dev)
+{
+ unsigned int avg_sustain_coef;
+ unsigned long target_freq;
+
+ target_freq = dev->avg_count / tegra->devfreq->profile->polling_ms;
+ avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
+ target_freq = do_percent(target_freq, avg_sustain_coef);
+
+ return target_freq;
+}
+
static void actmon_update_target(struct tegra_devfreq *tegra,
struct tegra_devfreq_device *dev)
{
unsigned long cpu_freq = 0;
unsigned long static_cpu_emc_freq = 0;
- unsigned int avg_sustain_coef;
- if (dev->config->avg_dependency_threshold) {
- cpu_freq = cpufreq_get(0);
- static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
- }
+ dev->target_freq = actmon_device_target_freq(tegra, dev);
- dev->target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD;
- avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
- dev->target_freq = do_percent(dev->target_freq, avg_sustain_coef);
- dev->target_freq += dev->boost_freq;
+ if (dev->config->avg_dependency_threshold &&
+ dev->config->avg_dependency_threshold <= dev->target_freq) {
+ cpu_freq = cpufreq_quick_get(0);
+ static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
- if (dev->avg_count >= dev->config->avg_dependency_threshold)
+ dev->target_freq += dev->boost_freq;
dev->target_freq = max(dev->target_freq, static_cpu_emc_freq);
+ } else {
+ dev->target_freq += dev->boost_freq;
+ }
}
static irqreturn_t actmon_thread_isr(int irq, void *data)
@@ -354,8 +371,8 @@ static irqreturn_t actmon_thread_isr(int irq, void *data)
return handled ? IRQ_HANDLED : IRQ_NONE;
}
-static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
- unsigned long action, void *ptr)
+static int tegra_actmon_clk_notify_cb(struct notifier_block *nb,
+ unsigned long action, void *ptr)
{
struct clk_notifier_data *data = ptr;
struct tegra_devfreq *tegra;
@@ -365,7 +382,7 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
if (action != POST_RATE_CHANGE)
return NOTIFY_OK;
- tegra = container_of(nb, struct tegra_devfreq, rate_change_nb);
+ tegra = container_of(nb, struct tegra_devfreq, clk_rate_change_nb);
tegra->cur_freq = data->new_rate / KHZ;
@@ -375,7 +392,79 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
tegra_devfreq_update_wmark(tegra, dev);
}
- actmon_write_barrier(tegra);
+ return NOTIFY_OK;
+}
+
+static void tegra_actmon_delayed_update(struct work_struct *work)
+{
+ struct tegra_devfreq *tegra = container_of(work, struct tegra_devfreq,
+ cpufreq_update_work.work);
+
+ mutex_lock(&tegra->devfreq->lock);
+ update_devfreq(tegra->devfreq);
+ mutex_unlock(&tegra->devfreq->lock);
+}
+
+static unsigned long
+tegra_actmon_cpufreq_contribution(struct tegra_devfreq *tegra,
+ unsigned int cpu_freq)
+{
+ struct tegra_devfreq_device *actmon_dev = &tegra->devices[MCCPU];
+ unsigned long static_cpu_emc_freq, dev_freq;
+
+ dev_freq = actmon_device_target_freq(tegra, actmon_dev);
+
+ /* check whether CPU's freq is taken into account at all */
+ if (dev_freq < actmon_dev->config->avg_dependency_threshold)
+ return 0;
+
+ static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
+
+ if (dev_freq >= static_cpu_emc_freq)
+ return 0;
+
+ return static_cpu_emc_freq;
+}
+
+static int tegra_actmon_cpu_notify_cb(struct notifier_block *nb,
+ unsigned long action, void *ptr)
+{
+ struct cpufreq_freqs *freqs = ptr;
+ struct tegra_devfreq *tegra;
+ unsigned long old, new, delay;
+
+ if (action != CPUFREQ_POSTCHANGE)
+ return NOTIFY_OK;
+
+ tegra = container_of(nb, struct tegra_devfreq, cpu_rate_change_nb);
+
+ /*
+ * Quickly check whether CPU frequency should be taken into account
+ * at all, without blocking CPUFreq's core.
+ */
+ if (mutex_trylock(&tegra->devfreq->lock)) {
+ old = tegra_actmon_cpufreq_contribution(tegra, freqs->old);
+ new = tegra_actmon_cpufreq_contribution(tegra, freqs->new);
+ mutex_unlock(&tegra->devfreq->lock);
+
+ /*
+ * If CPU's frequency shouldn't be taken into account at
+ * the moment, then there is no need to update the devfreq's
+ * state because ISR will re-check CPU's frequency on the
+ * next interrupt.
+ */
+ if (old == new)
+ return NOTIFY_OK;
+ }
+
+ /*
+ * CPUFreq driver should support CPUFREQ_ASYNC_NOTIFICATION in order
+ * to allow asynchronous notifications. This means we can't block
+ * here for too long, otherwise CPUFreq's core will complain with a
+ * warning splat.
+ */
+ delay = msecs_to_jiffies(ACTMON_SAMPLING_PERIOD);
+ schedule_delayed_work(&tegra->cpufreq_update_work, delay);
return NOTIFY_OK;
}
@@ -385,9 +474,12 @@ static void tegra_actmon_configure_device(struct tegra_devfreq *tegra,
{
u32 val = 0;
+ /* reset boosting on governor's restart */
+ dev->boost_freq = 0;
+
dev->target_freq = tegra->cur_freq;
- dev->avg_count = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
+ dev->avg_count = tegra->cur_freq * tegra->devfreq->profile->polling_ms;
device_writel(dev, dev->avg_count, ACTMON_DEV_INIT_AVG);
tegra_devfreq_update_avg_wmark(tegra, dev);
@@ -405,45 +497,116 @@ static void tegra_actmon_configure_device(struct tegra_devfreq *tegra,
<< ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_NUM_SHIFT;
val |= ACTMON_DEV_CTRL_AVG_ABOVE_WMARK_EN;
val |= ACTMON_DEV_CTRL_AVG_BELOW_WMARK_EN;
- val |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
val |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
val |= ACTMON_DEV_CTRL_ENB;
device_writel(dev, val, ACTMON_DEV_CTRL);
}
-static void tegra_actmon_start(struct tegra_devfreq *tegra)
+static void tegra_actmon_stop_devices(struct tegra_devfreq *tegra)
{
+ struct tegra_devfreq_device *dev = tegra->devices;
unsigned int i;
- disable_irq(tegra->irq);
+ for (i = 0; i < ARRAY_SIZE(tegra->devices); i++, dev++) {
+ device_writel(dev, ACTMON_DEV_CTRL_STOP, ACTMON_DEV_CTRL);
+ device_writel(dev, ACTMON_INTR_STATUS_CLEAR,
+ ACTMON_DEV_INTR_STATUS);
+ }
+}
- actmon_writel(tegra, ACTMON_SAMPLING_PERIOD - 1,
+static int tegra_actmon_resume(struct tegra_devfreq *tegra)
+{
+ unsigned int i;
+ int err;
+
+ if (!tegra->devfreq->profile->polling_ms || !tegra->started)
+ return 0;
+
+ actmon_writel(tegra, tegra->devfreq->profile->polling_ms - 1,
ACTMON_GLB_PERIOD_CTRL);
+ /*
+ * CLK notifications are needed in order to reconfigure the upper
+ * consecutive watermark in accordance to the actual clock rate
+ * to avoid unnecessary upper interrupts.
+ */
+ err = clk_notifier_register(tegra->emc_clock,
+ &tegra->clk_rate_change_nb);
+ if (err) {
+ dev_err(tegra->devfreq->dev.parent,
+ "Failed to register rate change notifier\n");
+ return err;
+ }
+
+ tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ;
+
for (i = 0; i < ARRAY_SIZE(tegra->devices); i++)
tegra_actmon_configure_device(tegra, &tegra->devices[i]);
- actmon_write_barrier(tegra);
+ /*
+ * We are estimating CPU's memory bandwidth requirement based on
+ * amount of memory accesses and system's load, judging by CPU's
+ * frequency. We also don't want to receive events about CPU's
+ * frequency transaction when governor is stopped, hence notifier
+ * is registered dynamically.
+ */
+ err = cpufreq_register_notifier(&tegra->cpu_rate_change_nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (err) {
+ dev_err(tegra->devfreq->dev.parent,
+ "Failed to register rate change notifier: %d\n", err);
+ goto err_stop;
+ }
enable_irq(tegra->irq);
+
+ return 0;
+
+err_stop:
+ tegra_actmon_stop_devices(tegra);
+
+ clk_notifier_unregister(tegra->emc_clock, &tegra->clk_rate_change_nb);
+
+ return err;
}
-static void tegra_actmon_stop(struct tegra_devfreq *tegra)
+static int tegra_actmon_start(struct tegra_devfreq *tegra)
{
- unsigned int i;
+ int ret = 0;
- disable_irq(tegra->irq);
+ if (!tegra->started) {
+ tegra->started = true;
- for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
- device_writel(&tegra->devices[i], 0x00000000, ACTMON_DEV_CTRL);
- device_writel(&tegra->devices[i], ACTMON_INTR_STATUS_CLEAR,
- ACTMON_DEV_INTR_STATUS);
+ ret = tegra_actmon_resume(tegra);
+ if (ret)
+ tegra->started = false;
}
- actmon_write_barrier(tegra);
+ return ret;
+}
- enable_irq(tegra->irq);
+static void tegra_actmon_pause(struct tegra_devfreq *tegra)
+{
+ if (!tegra->devfreq->profile->polling_ms || !tegra->started)
+ return;
+
+ disable_irq(tegra->irq);
+
+ cpufreq_unregister_notifier(&tegra->cpu_rate_change_nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+
+ cancel_delayed_work_sync(&tegra->cpufreq_update_work);
+
+ tegra_actmon_stop_devices(tegra);
+
+ clk_notifier_unregister(tegra->emc_clock, &tegra->clk_rate_change_nb);
+}
+
+static void tegra_actmon_stop(struct tegra_devfreq *tegra)
+{
+ tegra_actmon_pause(tegra);
+ tegra->started = false;
}
static int tegra_devfreq_target(struct device *dev, unsigned long *freq,
@@ -463,7 +626,7 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq,
rate = dev_pm_opp_get_freq(opp);
dev_pm_opp_put(opp);
- err = clk_set_min_rate(tegra->emc_clock, rate);
+ err = clk_set_min_rate(tegra->emc_clock, rate * KHZ);
if (err)
return err;
@@ -492,7 +655,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
stat->private_data = tegra;
/* The below are to be used by the other governors */
- stat->current_frequency = cur_freq * KHZ;
+ stat->current_frequency = cur_freq;
actmon_dev = &tegra->devices[MCALL];
@@ -503,7 +666,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
stat->busy_time *= 100 / BUS_SATURATION_RATIO;
/* Number of cycles in a sampling period */
- stat->total_time = ACTMON_SAMPLING_PERIOD * cur_freq;
+ stat->total_time = tegra->devfreq->profile->polling_ms * cur_freq;
stat->busy_time = min(stat->busy_time, stat->total_time);
@@ -511,7 +674,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
}
static struct devfreq_dev_profile tegra_devfreq_profile = {
- .polling_ms = 0,
+ .polling_ms = ACTMON_SAMPLING_PERIOD,
.target = tegra_devfreq_target,
.get_dev_status = tegra_devfreq_get_dev_status,
};
@@ -542,7 +705,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
target_freq = max(target_freq, dev->target_freq);
}
- *freq = target_freq * KHZ;
+ *freq = target_freq;
return 0;
}
@@ -551,11 +714,19 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
unsigned int event, void *data)
{
struct tegra_devfreq *tegra = dev_get_drvdata(devfreq->dev.parent);
+ unsigned int *new_delay = data;
+ int ret = 0;
+
+ /*
+ * Couple devfreq-device with the governor early because it is
+ * needed at the moment of governor's start (used by ISR).
+ */
+ tegra->devfreq = devfreq;
switch (event) {
case DEVFREQ_GOV_START:
devfreq_monitor_start(devfreq);
- tegra_actmon_start(tegra);
+ ret = tegra_actmon_start(tegra);
break;
case DEVFREQ_GOV_STOP:
@@ -563,6 +734,21 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
devfreq_monitor_stop(devfreq);
break;
+ case DEVFREQ_GOV_INTERVAL:
+ /*
+ * ACTMON hardware supports up to 256 milliseconds for the
+ * sampling period.
+ */
+ if (*new_delay > 256) {
+ ret = -EINVAL;
+ break;
+ }
+
+ tegra_actmon_pause(tegra);
+ devfreq_interval_update(devfreq, new_delay);
+ ret = tegra_actmon_resume(tegra);
+ break;
+
case DEVFREQ_GOV_SUSPEND:
tegra_actmon_stop(tegra);
devfreq_monitor_suspend(devfreq);
@@ -570,11 +756,11 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
case DEVFREQ_GOV_RESUME:
devfreq_monitor_resume(devfreq);
- tegra_actmon_start(tegra);
+ ret = tegra_actmon_start(tegra);
break;
}
- return 0;
+ return ret;
}
static struct devfreq_governor tegra_devfreq_governor = {
@@ -582,14 +768,16 @@ static struct devfreq_governor tegra_devfreq_governor = {
.get_target_freq = tegra_governor_get_target,
.event_handler = tegra_governor_event_handler,
.immutable = true,
+ .interrupt_driven = true,
};
static int tegra_devfreq_probe(struct platform_device *pdev)
{
- struct tegra_devfreq *tegra;
struct tegra_devfreq_device *dev;
+ struct tegra_devfreq *tegra;
+ struct devfreq *devfreq;
unsigned int i;
- unsigned long rate;
+ long rate;
int err;
tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
@@ -618,12 +806,22 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
return PTR_ERR(tegra->emc_clock);
}
- tegra->irq = platform_get_irq(pdev, 0);
- if (tegra->irq < 0) {
- err = tegra->irq;
+ err = platform_get_irq(pdev, 0);
+ if (err < 0) {
dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err);
return err;
}
+ tegra->irq = err;
+
+ irq_set_status_flags(tegra->irq, IRQ_NOAUTOEN);
+
+ err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL,
+ actmon_thread_isr, IRQF_ONESHOT,
+ "tegra-devfreq", tegra);
+ if (err) {
+ dev_err(&pdev->dev, "Interrupt request failed: %d\n", err);
+ return err;
+ }
reset_control_assert(tegra->reset);
@@ -636,8 +834,13 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
reset_control_deassert(tegra->reset);
- tegra->max_freq = clk_round_rate(tegra->emc_clock, ULONG_MAX) / KHZ;
- tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ;
+ rate = clk_round_rate(tegra->emc_clock, ULONG_MAX);
+ if (rate < 0) {
+ dev_err(&pdev->dev, "Failed to round clock rate: %ld\n", rate);
+ return rate;
+ }
+
+ tegra->max_freq = rate / KHZ;
for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) {
dev = tegra->devices + i;
@@ -648,7 +851,14 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
for (rate = 0; rate <= tegra->max_freq * KHZ; rate++) {
rate = clk_round_rate(tegra->emc_clock, rate);
- err = dev_pm_opp_add(&pdev->dev, rate, 0);
+ if (rate < 0) {
+ dev_err(&pdev->dev,
+ "Failed to round clock rate: %ld\n", rate);
+ err = rate;
+ goto remove_opps;
+ }
+
+ err = dev_pm_opp_add(&pdev->dev, rate / KHZ, 0);
if (err) {
dev_err(&pdev->dev, "Failed to add OPP: %d\n", err);
goto remove_opps;
@@ -657,49 +867,33 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tegra);
- tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb;
- err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb);
- if (err) {
- dev_err(&pdev->dev,
- "Failed to register rate change notifier\n");
- goto remove_opps;
- }
+ tegra->clk_rate_change_nb.notifier_call = tegra_actmon_clk_notify_cb;
+ tegra->cpu_rate_change_nb.notifier_call = tegra_actmon_cpu_notify_cb;
+
+ INIT_DELAYED_WORK(&tegra->cpufreq_update_work,
+ tegra_actmon_delayed_update);
err = devfreq_add_governor(&tegra_devfreq_governor);
if (err) {
dev_err(&pdev->dev, "Failed to add governor: %d\n", err);
- goto unreg_notifier;
+ goto remove_opps;
}
tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock);
- tegra->devfreq = devfreq_add_device(&pdev->dev,
- &tegra_devfreq_profile,
- "tegra_actmon",
- NULL);
- if (IS_ERR(tegra->devfreq)) {
- err = PTR_ERR(tegra->devfreq);
- goto remove_governor;
- }
+ tegra_devfreq_profile.initial_freq /= KHZ;
- err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL,
- actmon_thread_isr, IRQF_ONESHOT,
- "tegra-devfreq", tegra);
- if (err) {
- dev_err(&pdev->dev, "Interrupt request failed: %d\n", err);
- goto remove_devfreq;
+ devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile,
+ "tegra_actmon", NULL);
+ if (IS_ERR(devfreq)) {
+ err = PTR_ERR(devfreq);
+ goto remove_governor;
}
return 0;
-remove_devfreq:
- devfreq_remove_device(tegra->devfreq);
-
remove_governor:
devfreq_remove_governor(&tegra_devfreq_governor);
-unreg_notifier:
- clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb);
-
remove_opps:
dev_pm_opp_remove_all_dynamic(&pdev->dev);
@@ -716,7 +910,6 @@ static int tegra_devfreq_remove(struct platform_device *pdev)
devfreq_remove_device(tegra->devfreq);
devfreq_remove_governor(&tegra_devfreq_governor);
- clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb);
dev_pm_opp_remove_all_dynamic(&pdev->dev);
reset_control_reset(tegra->reset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4d71537a960d..a46090071034 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -950,21 +950,7 @@ static void psp_print_fw_hdr(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
struct amdgpu_device *adev = psp->adev;
- const struct sdma_firmware_header_v1_0 *sdma_hdr =
- (const struct sdma_firmware_header_v1_0 *)
- adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
- const struct gfx_firmware_header_v1_0 *ce_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- const struct gfx_firmware_header_v1_0 *pfp_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- const struct gfx_firmware_header_v1_0 *me_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- const struct gfx_firmware_header_v1_0 *mec_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- const struct rlc_firmware_header_v2_0 *rlc_hdr =
- (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- const struct smc_firmware_header_v1_0 *smc_hdr =
- (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
+ struct common_firmware_header *hdr;
switch (ucode->ucode_id) {
case AMDGPU_UCODE_ID_SDMA0:
@@ -975,25 +961,33 @@ static void psp_print_fw_hdr(struct psp_context *psp,
case AMDGPU_UCODE_ID_SDMA5:
case AMDGPU_UCODE_ID_SDMA6:
case AMDGPU_UCODE_ID_SDMA7:
- amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header);
+ hdr = (struct common_firmware_header *)
+ adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_CE:
- amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_PFP:
- amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_ME:
- amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_MEC1:
- amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_RLC_G:
- amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(hdr);
break;
case AMDGPU_UCODE_ID_SMC:
- amdgpu_ucode_print_smc_hdr(&smc_hdr->header);
+ hdr = (struct common_firmware_header *)adev->pm.fw->data;
+ amdgpu_ucode_print_smc_hdr(hdr);
break;
default:
break;
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 12099760d99e..c002f234ff31 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -4896,6 +4896,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
power_domains->initializing = true;
+ /* Must happen before power domain init on VLV/CHV */
+ intel_update_rawclk(i915);
+
if (INTEL_GEN(i915) >= 11) {
icl_display_core_init(i915, resume);
} else if (IS_CANNONLAKE(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1cdfe05514c3..e41fd94ae5a9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -319,6 +319,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
free_engines(rcu_access_pointer(ctx->engines));
mutex_destroy(&ctx->engines_mutex);
+ kfree(ctx->jump_whitelist);
+
if (ctx->timeline)
intel_timeline_put(ctx->timeline);
@@ -441,6 +443,9 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+ ctx->jump_whitelist = NULL;
+ ctx->jump_whitelist_cmds = 0;
+
return ctx;
err_free:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 260d59cc3de8..00537b9d7006 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -192,6 +192,13 @@ struct i915_gem_context {
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
+
+ /** jump_whitelist: Bit array for tracking cmds during cmdparsing
+ * Guarded by struct_mutex
+ */
+ unsigned long *jump_whitelist;
+ /** jump_whitelist_cmds: No of cmd slots available */
+ u32 jump_whitelist_cmds;
};
#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b5f6937369ea..e635e1e5f4d3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -296,7 +296,9 @@ static inline u64 gen8_noncanonical_addr(u64 address)
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+ return intel_engine_requires_cmd_parser(eb->engine) ||
+ (intel_engine_using_cmd_parser(eb->engine) &&
+ eb->args->batch_len);
}
static int eb_create(struct i915_execbuffer *eb)
@@ -1955,40 +1957,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
return 0;
}
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+static struct i915_vma *
+shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *dev_priv = eb->i915;
+ struct i915_vma * const vma = *eb->vma;
+ struct i915_address_space *vm;
+ u64 flags;
+
+ /*
+ * PPGTT backed shadow buffers must be mapped RO, to prevent
+ * post-scan tampering
+ */
+ if (CMDPARSER_USES_GGTT(dev_priv)) {
+ flags = PIN_GLOBAL;
+ vm = &dev_priv->ggtt.vm;
+ } else if (vma->vm->has_read_only) {
+ flags = PIN_USER;
+ vm = vma->vm;
+ i915_gem_object_set_readonly(obj);
+ } else {
+ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
+}
+
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
{
struct intel_engine_pool_node *pool;
struct i915_vma *vma;
+ u64 batch_start;
+ u64 shadow_batch_start;
int err;
pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
if (IS_ERR(pool))
return ERR_CAST(pool);
- err = intel_engine_cmd_parser(eb->engine,
+ vma = shadow_batch_pin(eb, pool->obj);
+ if (IS_ERR(vma))
+ goto err;
+
+ batch_start = gen8_canonical_addr(eb->batch->node.start) +
+ eb->batch_start_offset;
+
+ shadow_batch_start = gen8_canonical_addr(vma->node.start);
+
+ err = intel_engine_cmd_parser(eb->gem_context,
+ eb->engine,
eb->batch->obj,
- pool->obj,
+ batch_start,
eb->batch_start_offset,
eb->batch_len,
- is_master);
+ pool->obj,
+ shadow_batch_start);
+
if (err) {
- if (err == -EACCES) /* unhandled chained batch */
+ i915_vma_unpin(vma);
+
+ /*
+ * Unsafe GGTT-backed buffers can still be submitted safely
+ * as non-secure.
+ * For PPGTT backing however, we have no choice but to forcibly
+ * reject unsafe buffers
+ */
+ if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
+ /* Execute original buffer non-secure */
vma = NULL;
else
vma = ERR_PTR(err);
goto err;
}
- vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- goto err;
-
eb->vma[eb->buffer_count] = i915_vma_get(vma);
eb->flags[eb->buffer_count] =
__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
vma->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
+ eb->batch_start_offset = 0;
+ eb->batch = vma;
+
+ if (CMDPARSER_USES_GGTT(eb->i915))
+ eb->batch_flags |= I915_DISPATCH_SECURE;
+
+ /* eb->batch_len unchanged */
+
vma->private = pool;
return vma;
@@ -2421,6 +2477,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
struct drm_syncobj **fences)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
@@ -2432,7 +2489,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
~__EXEC_OBJECT_UNKNOWN_FLAGS);
- eb.i915 = to_i915(dev);
+ eb.i915 = i915;
eb.file = file;
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@@ -2452,8 +2509,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
+ if (INTEL_GEN(i915) >= 11)
+ return -ENODEV;
+
+ /* Return -EPERM to trigger fallback code on old binaries. */
+ if (!HAS_SECURE_BATCHES(i915))
+ return -EPERM;
+
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -EPERM;
eb.batch_flags |= I915_DISPATCH_SECURE;
}
@@ -2530,34 +2594,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (eb.batch_len == 0)
+ eb.batch_len = eb.batch->size - eb.batch_start_offset;
+
if (eb_use_cmdparser(&eb)) {
struct i915_vma *vma;
- vma = eb_parse(&eb, drm_is_current_master(file));
+ vma = eb_parse(&eb);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_vma;
}
-
- if (vma) {
- /*
- * Batch parsed and accepted:
- *
- * Set the DISPATCH_SECURE bit to remove the NON_SECURE
- * bit from MI_BATCH_BUFFER_START commands issued in
- * the dispatch_execbuffer implementations. We
- * specifically don't want that set on batches the
- * command parser has accepted.
- */
- eb.batch_flags |= I915_DISPATCH_SECURE;
- eb.batch_start_offset = 0;
- eb.batch = vma;
- }
}
- if (eb.batch_len == 0)
- eb.batch_len = eb.batch->size - eb.batch_start_offset;
-
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a82cea95c2f2..9dd8c299cb2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -475,12 +475,13 @@ struct intel_engine_cs {
struct intel_engine_hangcheck hangcheck;
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -541,9 +542,15 @@ struct intel_engine_cs {
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 1363e069ec83..fac75afed35b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -38,6 +38,9 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
intel_enable_gt_powersave(i915);
i915_update_gfx_val(i915);
@@ -67,6 +70,11 @@ static int __gt_park(struct intel_wakeref *wf)
if (INTEL_GEN(i915) >= 6)
gen6_rps_idle(i915);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
+ i915_rc6_ctx_wa_check(i915);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
+
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 728704bbbe18..cea184a7dde9 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -199,14 +199,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
- /* Bypass LLC - Uncached (EHL+) */ \
- MOCS_ENTRY(16, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_1_UC), \
- /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
- MOCS_ENTRY(17, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -270,7 +262,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
L3_1_UC),
/* HW Special Case (Displayable) */
MOCS_ENTRY(61,
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1),
+ LE_1_UC | LE_TC_1_LLC,
L3_3_WB),
};
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 13044c027f27..4bfaefdf548d 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -498,8 +498,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
goto out_free_gem;
}
- i915_gem_object_put(obj);
-
ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
if (ret < 0) {
gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
@@ -524,6 +522,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
file_count(dmabuf->file),
kref_read(&obj->base.refcount));
+ i915_gem_object_put(obj);
+
return dmabuf_fd;
out_free_dmabuf:
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 24555102e198..f24096e27bef 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -53,13 +53,11 @@
* granting userspace undue privileges. There are three categories of privilege.
*
* First, commands which are explicitly defined as privileged or which should
- * only be used by the kernel driver. The parser generally rejects such
- * commands, though it may allow some from the drm master process.
+ * only be used by the kernel driver. The parser rejects such commands
*
* Second, commands which access registers. To support correct/enhanced
* userspace functionality, particularly certain OpenGL extensions, the parser
- * provides a whitelist of registers which userspace may safely access (for both
- * normal and drm master processes).
+ * provides a whitelist of registers which userspace may safely access
*
* Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
* The parser always rejects such commands.
@@ -84,9 +82,9 @@
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
- * mentioned above: rejected, master-only, register whitelist. The parser
- * implements a number of checks, including the privileged memory checks, via a
- * general bitmasking mechanism.
+ * mentioned above: rejected, register whitelist. The parser implements a number
+ * of checks, including the privileged memory checks, via a general bitmasking
+ * mechanism.
*/
/*
@@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor {
* CMD_DESC_REJECT: The command is never allowed
* CMD_DESC_REGISTER: The command should be checked against the
* register whitelist for the appropriate ring
- * CMD_DESC_MASTER: The command is allowed if the submitting process
- * is the DRM master
*/
u32 flags;
#define CMD_DESC_FIXED (1<<0)
@@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor {
#define CMD_DESC_REJECT (1<<2)
#define CMD_DESC_REGISTER (1<<3)
#define CMD_DESC_BITMASK (1<<4)
-#define CMD_DESC_MASTER (1<<5)
/*
* The command's unique identification bits and the bitmask to get them.
@@ -194,7 +189,7 @@ struct drm_i915_cmd_table {
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
- .cmd = { (op), ~0u << (opm) }, \
+ .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
@@ -209,14 +204,13 @@ struct drm_i915_cmd_table {
#define R CMD_DESC_REJECT
#define W CMD_DESC_REGISTER
#define B CMD_DESC_BITMASK
-#define M CMD_DESC_MASTER
/* Command Mask Fixed Len Action
---------------------------------------------------------- */
-static const struct drm_i915_cmd_descriptor common_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
CMD( MI_NOOP, SMI, F, 1, S ),
CMD( MI_USER_INTERRUPT, SMI, F, 1, R ),
- CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ),
CMD( MI_ARB_CHECK, SMI, F, 1, S ),
CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
@@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ),
};
-static const struct drm_i915_cmd_descriptor render_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
CMD( MI_FLUSH, SMI, F, 1, S ),
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_PREDICATE, SMI, F, 1, S ),
@@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_RS_CONTEXT, SMI, F, 1, S ),
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
@@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ),
};
-static const struct drm_i915_cmd_descriptor video_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
CMD( MFX_WAIT, SMFX, F, 1, S ),
};
-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
}}, ),
};
-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B,
.bits = {{
@@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
};
static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
+/*
+ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
+ * need to re-enforce the register access checks. We therefore only need to
+ * teach the cmdparser how to find the end of each command, and identify
+ * register accesses. The table doesn't need to reject any commands, and so
+ * the only commands listed here are:
+ * 1) Those that touch registers
+ * 2) Those that do not have the default 8-bit length
+ *
+ * Note that the default MI length mask chosen for this table is 0xFF, not
+ * the 0x3F used on older devices. This is because the vast majority of MI
+ * cmds on Gen9 use a standard 8-bit Length field.
+ * All the Gen9 blitter instructions are standard 0xFF length mask, and
+ * none allow access to non-general registers, so in fact no BLT cmds are
+ * included in the table at all.
+ *
+ */
+static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
+ CMD( MI_NOOP, SMI, F, 1, S ),
+ CMD( MI_USER_INTERRUPT, SMI, F, 1, S ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ),
+ CMD( MI_FLUSH, SMI, F, 1, S ),
+ CMD( MI_ARB_CHECK, SMI, F, 1, S ),
+ CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
+ CMD( MI_ARB_ON_OFF, SMI, F, 1, S ),
+ CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ),
+ CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ),
+ CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
+ CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ),
+ CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
+
+ /*
+ * We allow BB_START but apply further checks. We just sanitize the
+ * basic fields here.
+ */
+#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0)
+#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
+ CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B,
+ .bits = {{
+ .offset = 0,
+ .mask = MI_BB_START_OPERAND_MASK,
+ .expected = MI_BB_START_OPERAND_EXPECT,
+ }}, ),
+};
+
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
@@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc =
#undef R
#undef W
#undef B
-#undef M
-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
};
-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
};
-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { video_cmds, ARRAY_SIZE(video_cmds) },
+static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
};
-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
};
-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
};
-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
};
+static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
+ { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
+};
+
+
/*
* Register whitelists, sorted by increasing register offset.
*/
@@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
-};
-
-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
+static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
+ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
+ REG32(BCS_SWCTRL),
+ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+ REG64_IDX(BCS_GPR, 0),
+ REG64_IDX(BCS_GPR, 1),
+ REG64_IDX(BCS_GPR, 2),
+ REG64_IDX(BCS_GPR, 3),
+ REG64_IDX(BCS_GPR, 4),
+ REG64_IDX(BCS_GPR, 5),
+ REG64_IDX(BCS_GPR, 6),
+ REG64_IDX(BCS_GPR, 7),
+ REG64_IDX(BCS_GPR, 8),
+ REG64_IDX(BCS_GPR, 9),
+ REG64_IDX(BCS_GPR, 10),
+ REG64_IDX(BCS_GPR, 11),
+ REG64_IDX(BCS_GPR, 12),
+ REG64_IDX(BCS_GPR, 13),
+ REG64_IDX(BCS_GPR, 14),
+ REG64_IDX(BCS_GPR, 15),
};
#undef REG64
@@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
struct drm_i915_reg_table {
const struct drm_i915_reg_descriptor *regs;
int num_regs;
- bool master;
};
static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
};
static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
};
static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
+ { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
};
static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+};
+
+static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
+ { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
};
static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
+static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
+{
+ u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
+
+ if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
+ return 0xFF;
+
+ DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+ return 0;
+}
+
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
@@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
int cmd_table_count;
int ret;
- if (!IS_GEN(engine->i915, 7))
+ if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
+ engine->class == COPY_ENGINE_CLASS))
return;
switch (engine->class) {
case RENDER_CLASS:
if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_render_ring_cmds;
+ cmd_tables = hsw_render_ring_cmd_table;
cmd_table_count =
- ARRAY_SIZE(hsw_render_ring_cmds);
+ ARRAY_SIZE(hsw_render_ring_cmd_table);
} else {
- cmd_tables = gen7_render_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+ cmd_tables = gen7_render_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
}
if (IS_HASWELL(engine->i915)) {
@@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->reg_tables = ivb_render_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
}
-
engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VIDEO_DECODE_CLASS:
- cmd_tables = gen7_video_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+ cmd_tables = gen7_video_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case COPY_ENGINE_CLASS:
- if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_blt_ring_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+ engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+ if (IS_GEN(engine->i915, 9)) {
+ cmd_tables = gen9_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
+ engine->get_cmd_length_mask =
+ gen9_blt_get_cmd_length_mask;
+
+ /* BCS Engine unsafe without parser */
+ engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
+ } else if (IS_HASWELL(engine->i915)) {
+ cmd_tables = hsw_blt_ring_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
} else {
- cmd_tables = gen7_blt_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+ cmd_tables = gen7_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
}
- if (IS_HASWELL(engine->i915)) {
+ if (IS_GEN(engine->i915, 9)) {
+ engine->reg_tables = gen9_blt_reg_tables;
+ engine->reg_table_count =
+ ARRAY_SIZE(gen9_blt_reg_tables);
+ } else if (IS_HASWELL(engine->i915)) {
engine->reg_tables = hsw_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
} else {
engine->reg_tables = ivb_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
}
-
- engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VIDEO_ENHANCEMENT_CLASS:
- cmd_tables = hsw_vebox_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+ cmd_tables = hsw_vebox_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
/* VECS can use the same length_mask function as VCS */
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
@@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
return;
}
- engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
+ engine->flags |= I915_ENGINE_USING_CMD_PARSER;
}
/**
@@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
- if (!intel_engine_needs_cmd_parser(engine))
+ if (!intel_engine_using_cmd_parser(engine))
return;
fini_hash_table(engine);
@@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
}
static const struct drm_i915_reg_descriptor *
-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, u32 addr)
{
const struct drm_i915_reg_table *table = engine->reg_tables;
+ const struct drm_i915_reg_descriptor *reg = NULL;
int count = engine->reg_table_count;
- for (; count > 0; ++table, --count) {
- if (!table->master || is_master) {
- const struct drm_i915_reg_descriptor *reg;
+ for (; !reg && (count > 0); ++table, --count)
+ reg = __find_reg(table->regs, table->num_regs, addr);
- reg = __find_reg(table->regs, table->num_regs, addr);
- if (reg != NULL)
- return reg;
- }
- }
-
- return NULL;
+ return reg;
}
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
@@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
static bool check_cmd(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_descriptor *desc,
- const u32 *cmd, u32 length,
- const bool is_master)
+ const u32 *cmd, u32 length)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
@@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
- DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
- *cmd);
- return false;
- }
-
if (desc->flags & CMD_DESC_REGISTER) {
/*
* Get the distance between individual register offset
@@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
- find_reg(engine, is_master, reg_addr);
+ find_reg(engine, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
@@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return true;
}
+static int check_bbstart(const struct i915_gem_context *ctx,
+ u32 *cmd, u32 offset, u32 length,
+ u32 batch_len,
+ u64 batch_start,
+ u64 shadow_batch_start)
+{
+ u64 jump_offset, jump_target;
+ u32 target_cmd_offset, target_cmd_index;
+
+ /* For igt compatibility on older platforms */
+ if (CMDPARSER_USES_GGTT(ctx->i915)) {
+ DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
+ return -EACCES;
+ }
+
+ if (length != 3) {
+ DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
+ length);
+ return -EINVAL;
+ }
+
+ jump_target = *(u64*)(cmd+1);
+ jump_offset = jump_target - batch_start;
+
+ /*
+ * Any underflow of jump_target is guaranteed to be outside the range
+ * of a u32, so >= test catches both too large and too small
+ */
+ if (jump_offset >= batch_len) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ /*
+ * This cannot overflow a u32 because we already checked jump_offset
+ * is within the BB, and the batch_len is a u32
+ */
+ target_cmd_offset = lower_32_bits(jump_offset);
+ target_cmd_index = target_cmd_offset / sizeof(u32);
+
+ *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+
+ if (target_cmd_index == offset)
+ return 0;
+
+ if (ctx->jump_whitelist_cmds <= target_cmd_index) {
+ DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
+ return -EINVAL;
+ } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+{
+ const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
+ const u32 exact_size = BITS_TO_LONGS(batch_cmds);
+ u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
+ unsigned long *next_whitelist;
+
+ if (CMDPARSER_USES_GGTT(ctx->i915))
+ return;
+
+ if (batch_cmds <= ctx->jump_whitelist_cmds) {
+ bitmap_zero(ctx->jump_whitelist, batch_cmds);
+ return;
+ }
+
+again:
+ next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
+ if (next_whitelist) {
+ kfree(ctx->jump_whitelist);
+ ctx->jump_whitelist = next_whitelist;
+ ctx->jump_whitelist_cmds =
+ next_size * BITS_PER_BYTE * sizeof(long);
+ return;
+ }
+
+ if (next_size > exact_size) {
+ next_size = exact_size;
+ goto again;
+ }
+
+ DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+ bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+
+ return;
+}
+
#define LENGTH_BIAS 2
/**
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations
+ * @ctx: the context in which the batch is to execute
* @engine: the engine on which the batch is to execute
* @batch_obj: the batch buffer in question
- * @shadow_batch_obj: copy of the batch buffer in question
+ * @batch_start: Canonical base address of batch
* @batch_start_offset: byte offset in the batch at which execution starts
* @batch_len: length of the commands in batch_obj
- * @is_master: is the submitting process the drm master?
+ * @shadow_batch_obj: copy of the batch buffer in question
+ * @shadow_batch_start: Canonical base address of shadow_batch_obj
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+
+int intel_engine_cmd_parser(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master)
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start)
{
- u32 *cmd, *batch_end;
+ u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false;
@@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
return PTR_ERR(cmd);
}
+ init_whitelist(ctx, batch_len);
+
/*
* We use the batch length as size because the shadow object is as
* large or larger and copy_batch() will write MI_NOPs to the extra
@@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
do {
u32 length;
- if (*cmd == MI_BATCH_BUFFER_END) {
- if (needs_clflush_after) {
- void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
- drm_clflush_virt_range(ptr,
- (void *)(cmd + 1) - ptr);
- }
+ if (*cmd == MI_BATCH_BUFFER_END)
break;
- }
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
ret = -EINVAL;
- break;
- }
-
- /*
- * If the batch buffer contains a chained batch, return an
- * error that tells the caller to abort and dispatch the
- * workload as a non-secure batch.
- */
- if (desc->cmd.value == MI_BATCH_BUFFER_START) {
- ret = -EACCES;
- break;
+ goto err;
}
if (desc->flags & CMD_DESC_FIXED)
@@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
length,
batch_end - cmd);
ret = -EINVAL;
- break;
+ goto err;
}
- if (!check_cmd(engine, desc, cmd, length, is_master)) {
+ if (!check_cmd(engine, desc, cmd, length)) {
ret = -EACCES;
+ goto err;
+ }
+
+ if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+ ret = check_bbstart(ctx, cmd, offset, length,
+ batch_len, batch_start,
+ shadow_batch_start);
+
+ if (ret)
+ goto err;
break;
}
+ if (ctx->jump_whitelist_cmds > offset)
+ set_bit(offset, ctx->jump_whitelist);
+
cmd += length;
+ offset += length;
if (cmd >= batch_end) {
DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
ret = -EINVAL;
- break;
+ goto err;
}
} while (1);
+ if (needs_clflush_after) {
+ void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+
+ drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
+ }
+
+err:
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
@@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_uabi_engine(engine, dev_priv) {
- if (intel_engine_needs_cmd_parser(engine)) {
+ if (intel_engine_using_cmd_parser(engine)) {
active = true;
break;
}
@@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
* the parser enabled.
* 9. Don't whitelist or handle oacontrol specially, as ownership
* for oacontrol state is moving to i915-perf.
+ * 10. Support for Gen9 BCS Parsing
*/
- return 9;
+ return 10;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bb6f86c7067a..3d717e282908 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -364,9 +364,6 @@ static int i915_driver_modeset_probe(struct drm_device *dev)
if (ret)
goto cleanup_vga_client;
- /* must happen before intel_power_domains_init_hw() on VLV/CHV */
- intel_update_rawclk(dev_priv);
-
intel_power_domains_init_hw(dev_priv, false);
intel_csr_ucode_init(dev_priv);
@@ -1850,6 +1847,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
i915_gem_suspend_late(dev_priv);
+ i915_rc6_ctx_wa_suspend(dev_priv);
+
intel_uncore_suspend(&dev_priv->uncore);
intel_power_domains_suspend(dev_priv,
@@ -2053,6 +2052,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
intel_power_domains_resume(dev_priv);
+ i915_rc6_ctx_wa_resume(dev_priv);
+
intel_gt_sanitize(&dev_priv->gt, true);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 953e1d12c23c..89b6112bd66b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -593,6 +593,8 @@ struct intel_rps {
struct intel_rc6 {
bool enabled;
+ bool ctx_corrupted;
+ intel_wakeref_t ctx_corrupted_wakeref;
u64 prev_hw_residency[4];
u64 cur_residency[4];
};
@@ -2075,9 +2077,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define VEBOX_MASK(dev_priv) \
ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS)
+/*
+ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
+ * All later gens can run the final buffer from the ppgtt
+ */
+#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7)
+
#define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc)
#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
@@ -2110,10 +2119,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
+#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \
+ (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
+
/* WaRsDisableCoarsePowerGating:skl,cnl */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
- (IS_CANNONLAKE(dev_priv) || \
- IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
+ (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9))
#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
#define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -2284,6 +2295,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
#define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
+struct i915_vma * __must_check
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags);
+
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
static inline int __must_check
@@ -2393,12 +2412,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+int intel_engine_cmd_parser(struct i915_gem_context *cxt,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 user_batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master);
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start);
/* intel_device_info.c */
static inline struct intel_device_info *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0f94f239919..98305d987ac1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -964,6 +964,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_address_space *vm = &dev_priv->ggtt.vm;
+
+ return i915_gem_object_pin(obj, vm, view, size, alignment,
+ flags | PIN_GLOBAL);
+}
+
+struct i915_vma *
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
@@ -1038,7 +1052,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return ERR_PTR(ret);
}
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin(vma, size, alignment, flags);
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 5d9101376a3d..9f1517af5b7f 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -62,7 +62,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
break;
case I915_PARAM_HAS_SECURE_BATCHES:
- value = capable(CAP_SYS_ADMIN);
+ value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN);
break;
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version(i915);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2abd199093c5..f8ee9aba3955 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -471,6 +471,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define ECOCHK_PPGTT_WT_HSW (0x2 << 3)
#define ECOCHK_PPGTT_WB_HSW (0x3 << 3)
+#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
+
#define GAC_ECO_BITS _MMIO(0x14090)
#define ECOBITS_SNB_BIT (1 << 13)
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
@@ -555,6 +557,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
*/
#define BCS_SWCTRL _MMIO(0x22200)
+/* There are 16 GPR registers */
+#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8)
+#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4)
+
#define GPGPU_THREADS_DISPATCHED _MMIO(0x2290)
#define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4)
#define HS_INVOCATION_COUNT _MMIO(0x2300)
@@ -7211,6 +7217,10 @@ enum {
#define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084)
#define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088)
+/* Display Internal Timeout Register */
+#define RM_TIMEOUT _MMIO(0x42060)
+#define MMIO_TIMEOUT_US(us) ((us) << 0)
+
/* interrupts */
#define DE_MASTER_IRQ_CONTROL (1 << 31)
#define DE_SPRITEB_FLIP_DONE (1 << 29)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 75ee027abb80..2efe1d12d5a9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -126,6 +126,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
*/
I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+ /*
+ * Lower the display internal timeout.
+ * This is needed to avoid any hard hangs when DSI port PLL
+ * is off and a MMIO access is attempted by any privilege
+ * application, using batch buffers or any other means.
+ */
+ I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
}
static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -8544,6 +8552,100 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
}
+static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
+{
+ return !I915_READ(GEN8_RC6_CTX_INFO);
+}
+
+static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
+{
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (i915_rc6_ctx_corrupted(i915)) {
+ DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+ i915->gt_pm.rc6.ctx_corrupted = true;
+ i915->gt_pm.rc6.ctx_corrupted_wakeref =
+ intel_runtime_pm_get(&i915->runtime_pm);
+ }
+}
+
+static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
+{
+ if (i915->gt_pm.rc6.ctx_corrupted) {
+ intel_runtime_pm_put(&i915->runtime_pm,
+ i915->gt_pm.rc6.ctx_corrupted_wakeref);
+ i915->gt_pm.rc6.ctx_corrupted = false;
+ }
+}
+
+/**
+ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
+ */
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
+{
+ if (i915->gt_pm.rc6.ctx_corrupted)
+ intel_runtime_pm_put(&i915->runtime_pm,
+ i915->gt_pm.rc6.ctx_corrupted_wakeref);
+}
+
+/**
+ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
+{
+ if (!i915->gt_pm.rc6.ctx_corrupted)
+ return;
+
+ if (i915_rc6_ctx_corrupted(i915)) {
+ i915->gt_pm.rc6.ctx_corrupted_wakeref =
+ intel_runtime_pm_get(&i915->runtime_pm);
+ return;
+ }
+
+ DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+ i915->gt_pm.rc6.ctx_corrupted = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *dev_priv);
+
+/**
+ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @i915: i915 device
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+ *
+ * Return false if no context corruption has happened since the last call of
+ * this function, true otherwise.
+*/
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
+{
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return false;
+
+ if (i915->gt_pm.rc6.ctx_corrupted)
+ return false;
+
+ if (!i915_rc6_ctx_corrupted(i915))
+ return false;
+
+ DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+ intel_disable_rc6(i915);
+ i915->gt_pm.rc6.ctx_corrupted = true;
+ i915->gt_pm.rc6.ctx_corrupted_wakeref =
+ intel_runtime_pm_get_noresume(&i915->runtime_pm);
+
+ return true;
+}
+
void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -8557,6 +8659,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
pm_runtime_get(&dev_priv->drm.pdev->dev);
}
+ i915_rc6_ctx_wa_init(dev_priv);
+
/* Initialize RPS limits (for userspace) */
if (IS_CHERRYVIEW(dev_priv))
cherryview_init_gt_powersave(dev_priv);
@@ -8595,6 +8699,8 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
if (IS_VALLEYVIEW(dev_priv))
valleyview_cleanup_gt_powersave(dev_priv);
+ i915_rc6_ctx_wa_cleanup(dev_priv);
+
if (!HAS_RC6(dev_priv))
pm_runtime_put(&dev_priv->drm.pdev->dev);
}
@@ -8623,7 +8729,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
i915->gt_pm.llc_pstate.enabled = false;
}
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
{
lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
@@ -8642,6 +8748,15 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
dev_priv->gt_pm.rc6.enabled = false;
}
+static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ mutex_lock(&rps->lock);
+ __intel_disable_rc6(dev_priv);
+ mutex_unlock(&rps->lock);
+}
+
static void intel_disable_rps(struct drm_i915_private *dev_priv)
{
lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
@@ -8667,7 +8782,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
{
mutex_lock(&dev_priv->gt_pm.rps.lock);
- intel_disable_rc6(dev_priv);
+ __intel_disable_rc6(dev_priv);
intel_disable_rps(dev_priv);
if (HAS_LLC(dev_priv))
intel_disable_llc_pstate(dev_priv);
@@ -8694,6 +8809,9 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv)
if (dev_priv->gt_pm.rc6.enabled)
return;
+ if (dev_priv->gt_pm.rc6.ctx_corrupted)
+ return;
+
if (IS_CHERRYVIEW(dev_priv))
cherryview_enable_rc6(dev_priv);
else if (IS_VALLEYVIEW(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
index e3573e1e16e3..0f7390c850ec 100644
--- a/drivers/gpu/drm/i915/intel_pm.h
+++ b/drivers/gpu/drm/i915/intel_pm.h
@@ -36,6 +36,9 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915);
void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct i915_request *rq);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 04c721d0d3b9..b89439ed210d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -488,7 +488,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
WARN_ON(!tcon->quirks->has_channel_0);
- tcon->dclk_min_div = 6;
+ tcon->dclk_min_div = 1;
tcon->dclk_max_div = 127;
sun4i_tcon0_mode_set_common(tcon, mode);
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 9cb2aa1e20ef..62a1c92ab803 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -39,6 +39,7 @@ struct i2c_acpi_lookup {
int index;
u32 speed;
u32 min_speed;
+ u32 force_speed;
};
/**
@@ -285,6 +286,19 @@ i2c_acpi_match_device(const struct acpi_device_id *matches,
return acpi_match_device(matches, &client->dev);
}
+static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = {
+ /*
+ * These Silead touchscreen controllers only work at 400KHz, for
+ * some reason they do not work at 100KHz. On some devices the ACPI
+ * tables list another device at their bus as only being capable
+ * of 100KHz, testing has shown that these other devices work fine
+ * at 400KHz (as can be expected of any recent i2c hw) so we force
+ * the speed of the bus to 400 KHz if a Silead device is present.
+ */
+ { "MSSL1680", 0 },
+ {}
+};
+
static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
void *data, void **return_value)
{
@@ -303,6 +317,9 @@ static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
if (lookup->speed <= lookup->min_speed)
lookup->min_speed = lookup->speed;
+ if (acpi_match_device_ids(adev, i2c_acpi_force_400khz_device_ids) == 0)
+ lookup->force_speed = 400000;
+
return AE_OK;
}
@@ -340,7 +357,16 @@ u32 i2c_acpi_find_bus_speed(struct device *dev)
return 0;
}
- return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0;
+ if (lookup.force_speed) {
+ if (lookup.force_speed != lookup.min_speed)
+ dev_warn(dev, FW_BUG "DSDT uses known not-working I2C bus speed %d, forcing it to %d\n",
+ lookup.min_speed, lookup.force_speed);
+ return lookup.force_speed;
+ } else if (lookup.min_speed != UINT_MAX) {
+ return lookup.min_speed;
+ } else {
+ return 0;
+ }
}
EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index 6f632d543fcc..7eb41990bd6d 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -245,14 +245,14 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
}
client = of_i2c_register_device(adap, rd->dn);
- put_device(&adap->dev);
-
if (IS_ERR(client)) {
dev_err(&adap->dev, "failed to create client for '%pOF'\n",
rd->dn);
+ put_device(&adap->dev);
of_node_clear_flag(rd->dn, OF_POPULATED);
return notifier_from_errno(PTR_ERR(client));
}
+ put_device(&adap->dev);
break;
case OF_RECONFIG_CHANGE_REMOVE:
/* already depopulated? */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 71cb9525c074..26b792bb1027 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}
- hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 61aa5504d7c3..61362bd6d3ce 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent &&
+ (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+ dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 513a8aac9ccd..1a3c647675a7 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device);
- if (qp->s_rnr_retry == 0 &&
- !((rdi->post_parms[wqe->wr.opcode].flags &
- RVT_OPERATION_IGN_RNR_CNT) &&
- qp->s_rnr_retry_cnt == 0)) {
- status = IB_WC_RNR_RETRY_EXC_ERR;
- goto class_b;
+ if (!(rdi->post_parms[wqe->wr.opcode].flags &
+ RVT_OPERATION_IGN_RNR_CNT)) {
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+ qp->s_rnr_retry--;
}
- if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
- qp->s_rnr_retry--;
/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index f21fca3617d5..e53f542b60af 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode
*/
-static u32 tid_rdma_flow_wt;
-
static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow,
bool fecn);
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+ if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+ priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->s_flags |= RVT_S_ACK_PENDING;
+ hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+ validate_r_tid_ack(qp->priv);
+ tid_rdma_schedule_ack(qp);
+}
+
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{
return
@@ -3005,10 +3023,7 @@ nak_psn:
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
- qpriv->r_tid_ack = qpriv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto unlock;
}
@@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
}
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{
/*
* Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of
- * segments between two sync points), assuming PMTU of 4K. The rationale
- * for this is that flows are released and recycled at each sync point.
+ * segments between two sync points). The rationale for this is that
+ * flows are released and recycled at each sync point.
*/
- tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
- TID_RDMA_MAX_SEGMENT_SIZE;
+ return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
}
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) {
- to_seg = tid_rdma_flow_wt *
+ to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv,
&rcd->flow_queue);
break;
@@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/*
* If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be
- * caused due a delay in scheduling the second leg which we
+ * caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments
*/
@@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
MAX_FLOWS)) {
ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
break;
}
@@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req);
trace_hfi1_tid_write_rsp_rcv_data(qp);
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
+ validate_r_tid_ack(priv);
if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e);
@@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
}
done:
- priv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_schedule_ack(qp);
exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn)
@@ -4388,10 +4399,7 @@ send_nak:
if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn;
- priv->s_flags |= RVT_S_ACK_PENDING;
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto done;
}
@@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
bail:
if (fecn)
qp->s_flags |= RVT_S_ECN;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 1c536185261e..6e82df2190b7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT 18
/*
* Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
-void hfi1_compute_tid_rdma_flow_wt(void);
-
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 86783276fb1f..3bb8f78fb7b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -59,7 +59,7 @@ enum {
#define HNS_ROCE_HEM_CHUNK_LEN \
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist)))
+ (sizeof(struct scatterlist) + sizeof(void *)))
#define check_whether_bt_num_3(type, hop_num) \
(type < HEM_TYPE_MTT && hop_num == 2)
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 9591457eb768..43ea2c13b212 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
srq->max_gs = srq_init_attr->attr.max_sge;
- srq_desc_size = max(16, 16 * srq->max_gs);
+ srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
srq->wqe_shift = ilog2(srq_desc_size);
diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c
index 1cb40c7475af..8229a9006917 100644
--- a/drivers/input/ff-memless.c
+++ b/drivers/input/ff-memless.c
@@ -489,6 +489,15 @@ static void ml_ff_destroy(struct ff_device *ff)
{
struct ml_device *ml = ff->private;
+ /*
+ * Even though we stop all playing effects when tearing down
+ * an input device (via input_device_flush() that calls into
+ * input_ff_flush() that stops and erases all effects), we
+ * do not actually stop the timer, and therefore we should
+ * do it here.
+ */
+ del_timer_sync(&ml->timer);
+
kfree(ml->private);
}
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 56fae3472114..704558d449a2 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -177,6 +177,7 @@ static const char * const smbus_pnp_ids[] = {
"LEN0096", /* X280 */
"LEN0097", /* X280 -> ALPS trackpoint */
"LEN009b", /* T580 */
+ "LEN0402", /* X1 Extreme 2nd Generation */
"LEN200f", /* T450s */
"LEN2054", /* E480 */
"LEN2055", /* E580 */
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index f28a7158b2ef..bbf9ae9f3f0c 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -510,7 +510,6 @@ struct f11_data {
struct rmi_2d_sensor_platform_data sensor_pdata;
unsigned long *abs_mask;
unsigned long *rel_mask;
- unsigned long *result_bits;
};
enum f11_finger_state {
@@ -1057,7 +1056,7 @@ static int rmi_f11_initialize(struct rmi_function *fn)
/*
** init instance data, fill in values and create any sysfs files
*/
- f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 3,
+ f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 2,
GFP_KERNEL);
if (!f11)
return -ENOMEM;
@@ -1076,8 +1075,6 @@ static int rmi_f11_initialize(struct rmi_function *fn)
+ sizeof(struct f11_data));
f11->rel_mask = (unsigned long *)((char *)f11
+ sizeof(struct f11_data) + mask_size);
- f11->result_bits = (unsigned long *)((char *)f11
- + sizeof(struct f11_data) + mask_size * 2);
set_bit(fn->irq_pos, f11->abs_mask);
set_bit(fn->irq_pos + 1, f11->rel_mask);
@@ -1284,8 +1281,8 @@ static irqreturn_t rmi_f11_attention(int irq, void *ctx)
valid_bytes = f11->sensor.attn_size;
memcpy(f11->sensor.data_pkt, drvdata->attn_data.data,
valid_bytes);
- drvdata->attn_data.data += f11->sensor.attn_size;
- drvdata->attn_data.size -= f11->sensor.attn_size;
+ drvdata->attn_data.data += valid_bytes;
+ drvdata->attn_data.size -= valid_bytes;
} else {
error = rmi_read_block(rmi_dev,
data_base_addr, f11->sensor.data_pkt,
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index d20a5d6780d1..7e97944f7616 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -55,6 +55,9 @@ struct f12_data {
const struct rmi_register_desc_item *data15;
u16 data15_offset;
+
+ unsigned long *abs_mask;
+ unsigned long *rel_mask;
};
static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
@@ -209,8 +212,8 @@ static irqreturn_t rmi_f12_attention(int irq, void *ctx)
valid_bytes = sensor->attn_size;
memcpy(sensor->data_pkt, drvdata->attn_data.data,
valid_bytes);
- drvdata->attn_data.data += sensor->attn_size;
- drvdata->attn_data.size -= sensor->attn_size;
+ drvdata->attn_data.data += valid_bytes;
+ drvdata->attn_data.size -= valid_bytes;
} else {
retval = rmi_read_block(rmi_dev, f12->data_addr,
sensor->data_pkt, sensor->pkt_size);
@@ -291,9 +294,18 @@ static int rmi_f12_write_control_regs(struct rmi_function *fn)
static int rmi_f12_config(struct rmi_function *fn)
{
struct rmi_driver *drv = fn->rmi_dev->driver;
+ struct f12_data *f12 = dev_get_drvdata(&fn->dev);
+ struct rmi_2d_sensor *sensor;
int ret;
- drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+ sensor = &f12->sensor;
+
+ if (!sensor->report_abs)
+ drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask);
+ else
+ drv->set_irq_bits(fn->rmi_dev, f12->abs_mask);
+
+ drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask);
ret = rmi_f12_write_control_regs(fn);
if (ret)
@@ -315,9 +327,12 @@ static int rmi_f12_probe(struct rmi_function *fn)
struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
u16 data_offset = 0;
+ int mask_size;
rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__);
+ mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long);
+
ret = rmi_read(fn->rmi_dev, query_addr, &buf);
if (ret < 0) {
dev_err(&fn->dev, "Failed to read general info register: %d\n",
@@ -332,10 +347,19 @@ static int rmi_f12_probe(struct rmi_function *fn)
return -ENODEV;
}
- f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL);
+ f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2,
+ GFP_KERNEL);
if (!f12)
return -ENOMEM;
+ f12->abs_mask = (unsigned long *)((char *)f12
+ + sizeof(struct f12_data));
+ f12->rel_mask = (unsigned long *)((char *)f12
+ + sizeof(struct f12_data) + mask_size);
+
+ set_bit(fn->irq_pos, f12->abs_mask);
+ set_bit(fn->irq_pos + 1, f12->rel_mask);
+
f12->has_dribble = !!(buf & BIT(3));
if (fn->dev.of_node) {
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index 710b02595486..897105b9a98b 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -359,7 +359,7 @@ static const struct vb2_ops rmi_f54_queue_ops = {
static const struct vb2_queue rmi_f54_queue = {
.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
.io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ,
- .buf_struct_size = sizeof(struct vb2_buffer),
+ .buf_struct_size = sizeof(struct vb2_v4l2_buffer),
.ops = &rmi_f54_queue_ops,
.mem_ops = &vb2_vmalloc_memops,
.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC,
@@ -601,7 +601,7 @@ static int rmi_f54_config(struct rmi_function *fn)
{
struct rmi_driver *drv = fn->rmi_dev->driver;
- drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+ drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask);
return 0;
}
@@ -730,6 +730,7 @@ static void rmi_f54_remove(struct rmi_function *fn)
video_unregister_device(&f54->vdev);
v4l2_device_unregister(&f54->v4l2);
+ destroy_workqueue(f54->workqueue);
}
struct rmi_function_handler rmi_f54_handler = {
diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c
index 4b22d49a0f49..6bcffc930384 100644
--- a/drivers/input/touchscreen/cyttsp4_core.c
+++ b/drivers/input/touchscreen/cyttsp4_core.c
@@ -1990,11 +1990,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd)
/* get sysinfo */
md->si = &cd->sysinfo;
- if (!md->si) {
- dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n",
- __func__, md->si);
- goto error_get_sysinfo;
- }
rc = cyttsp4_setup_input_device(cd);
if (rc)
@@ -2004,8 +1999,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd)
error_init_input:
input_free_device(md->input);
-error_get_sysinfo:
- input_set_drvdata(md->input, NULL);
error_alloc_failed:
dev_err(dev, "%s failed.\n", __func__);
return rc;
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index e7d1920729fb..0ae986c42bc8 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -358,7 +358,7 @@ static int sdhci_at91_probe(struct platform_device *pdev)
pm_runtime_use_autosuspend(&pdev->dev);
/* HS200 is broken at this moment */
- host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200;
+ host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200;
ret = sdhci_add_host(host);
if (ret)
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 2f0b092d6dcc..c5ba13fae399 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -163,7 +163,6 @@ struct tmio_mmc_host {
unsigned long last_req_ts;
struct mutex ios_lock; /* protect set_ios() context */
bool native_hotplug;
- bool runtime_synced;
bool sdio_irq_enabled;
/* Mandatory callback */
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 9b6e1001e77c..86b591100f16 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -39,6 +39,7 @@
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
#include <linux/pm_runtime.h>
#include <linux/regulator/consumer.h>
@@ -1248,10 +1249,12 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
/* See if we also get DMA */
tmio_mmc_request_dma(_host, pdata);
+ dev_pm_domain_start(&pdev->dev);
+ pm_runtime_get_noresume(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_enable(&pdev->dev);
- pm_runtime_get_sync(&pdev->dev);
ret = mmc_add_host(mmc);
if (ret)
@@ -1333,11 +1336,6 @@ int tmio_mmc_host_runtime_resume(struct device *dev)
{
struct tmio_mmc_host *host = dev_get_drvdata(dev);
- if (!host->runtime_synced) {
- host->runtime_synced = true;
- return 0;
- }
-
tmio_mmc_clk_enable(host);
tmio_mmc_hw_reset(host->mmc);
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index bb6032211043..0a9f42e5fedf 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -617,6 +617,7 @@ err_free_chan:
sl->tty = NULL;
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
+ free_netdev(sl->dev);
err_exit:
rtnl_unlock();
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index 073cbd0bb91b..d838c174dc0d 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -273,6 +273,19 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip,
int pin;
int err;
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
if (pin < 0)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 77f3511b97de..ca3aa1250dd1 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6280,6 +6280,10 @@ static int tg3_ptp_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
if (rq->perout.index != 0)
return -EINVAL;
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index f1a0c4dceda0..f37c9a08c4cf 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -763,6 +763,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
{
struct net_device *dev;
struct ep93xx_priv *ep;
+ struct resource *mem;
dev = platform_get_drvdata(pdev);
if (dev == NULL)
@@ -778,8 +779,8 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
iounmap(ep->base_addr);
if (ep->res != NULL) {
- release_resource(ep->res);
- kfree(ep->res);
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ release_mem_region(mem->start, resource_size(mem));
}
free_netdev(dev);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index e736ce2c58ca..a8f4c69252ff 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2524,6 +2524,7 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev)
struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
gemini_port_remove(port);
+ free_netdev(port->netdev);
return 0;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 19379bae0144..bf5add954181 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2232,8 +2232,16 @@ err_set_cdan:
err_service_reg:
free_channel(priv, channel);
err_alloc_ch:
- if (err == -EPROBE_DEFER)
+ if (err == -EPROBE_DEFER) {
+ for (i = 0; i < priv->num_channels; i++) {
+ channel = priv->channel[i];
+ nctx = &channel->nctx;
+ dpaa2_io_service_deregister(channel->dpio, nctx, dev);
+ free_channel(priv, channel);
+ }
+ priv->num_channels = 0;
return err;
+ }
if (cpumask_empty(&priv->dpio_cpumask)) {
dev_err(dev, "No cpu with an affine DPIO/DPCON\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 680c3508876d..52c9d204fe3d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -70,11 +70,6 @@ static const struct hns3_stats hns3_rxq_stats[] = {
#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
-struct hns3_link_mode_mapping {
- u32 hns3_link_mode;
- u32 ethtool_link_mode;
-};
-
static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
{
struct hnae3_handle *h = hns3_get_handle(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index c063301d6060..a1790af73096 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -124,7 +124,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
if (ret)
return ret;
- for (i = 0; i < HNAE3_MAX_TC; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
switch (ets->tc_tsa[i]) {
case IEEE_8021QAZ_TSA_STRICT:
if (hdev->tm_info.tc_info[i].tc_sch_mode !=
@@ -318,6 +318,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
struct net_device *netdev = h->kinfo.netdev;
struct hclge_dev *hdev = vport->back;
u8 i, j, pfc_map, *prio_tc;
+ int ret;
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
@@ -347,7 +348,21 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
hclge_tm_pfc_info_update(hdev);
- return hclge_pause_setup_hw(hdev, false);
+ ret = hclge_pause_setup_hw(hdev, false);
+ if (ret)
+ return ret;
+
+ ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ if (ret)
+ return ret;
+
+ ret = hclge_buffer_alloc(hdev);
+ if (ret) {
+ hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ return ret;
+ }
+
+ return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
}
/* DCBX configuration */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 16f7d0e15b4f..c052bb33b3d3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6263,11 +6263,23 @@ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0);
req = (struct hclge_mac_vlan_switch_cmd *)desc.data;
+
+ /* read current config parameter */
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM,
- false);
+ true);
req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL;
req->func_id = cpu_to_le32(func_id);
- req->switch_param = switch_param;
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "read mac vlan switch parameter fail, ret = %d\n", ret);
+ return ret;
+ }
+
+ /* modify and write new config parameter */
+ hclge_cmd_reuse_desc(&desc, false);
+ req->switch_param = (req->switch_param & param_mask) | switch_param;
req->param_mask = param_mask;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index fd3071f55bd3..c39e921757ba 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -521,6 +521,19 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests failing to enable both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
if (on) {
pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS,
rq->extts.index);
@@ -551,6 +564,10 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
return 0;
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
if (on) {
pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT,
rq->perout.index);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 206dc5dc1df8..5c1f389e3320 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 CGX driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 CGX driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
index fb3ba4968a9b..473d9751601f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 CGX driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 CGX driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index e332e82fc066..413c3f254cf8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 76a4575d18ff..75439fce0505 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 8d6d90fdfb73..5d4df315a0e1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index b2ce957605bb..da649f6a5573 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index c9d60b0554c0..5222e4228905 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 09a8d61f3144..1ea92a2e7cfe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index f920dac74e6c..84a39063a8bb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 69bb6bb06e76..d44ac666e730 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -4010,6 +4010,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_params_unregister;
devlink_params_publish(devlink);
+ devlink_reload_enable(devlink);
pci_save_state(pdev);
return 0;
@@ -4121,6 +4122,8 @@ static void mlx4_remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(priv);
int active_vfs = 0;
+ devlink_reload_disable(devlink);
+
if (mlx4_is_slave(dev))
persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0059b290e095..43f97601b500 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -236,6 +236,19 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
if (rq->extts.index >= clock->ptp_info.n_pins)
return -EINVAL;
@@ -290,6 +303,10 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
if (rq->perout.index >= clock->ptp_info.n_pins)
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 4421ab22182f..0a0884d86d44 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1189,6 +1189,9 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (mlxsw_driver->params_register)
devlink_params_publish(devlink);
+ if (!reload)
+ devlink_reload_enable(devlink);
+
return 0;
err_thermal_init:
@@ -1249,6 +1252,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
{
struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ if (!reload)
+ devlink_reload_disable(devlink);
if (devlink_is_reload_failed(devlink)) {
if (!reload)
/* Only the parts that were not de-initialized in the
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index 57b26c2acf87..e8fe9a90fe4f 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -429,6 +429,10 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on,
int pulse_width = 0;
int perout_bit = 0;
+ /* Reject requests with unsupported flags */
+ if (perout->flags)
+ return -EOPNOTSUPP;
+
if (!on) {
lan743x_ptp_perout_off(adapter);
return 0;
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index a9c89d5d8898..9f88b5db4f89 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -955,6 +955,8 @@ enum RAVB_QUEUE {
#define NUM_RX_QUEUE 2
#define NUM_TX_QUEUE 2
+#define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16))
+
/* TX descriptors per packet */
#define NUM_TX_DESC_GEN2 2
#define NUM_TX_DESC_GEN3 1
@@ -1018,7 +1020,6 @@ struct ravb_private {
u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */
u32 cur_tx[NUM_TX_QUEUE];
u32 dirty_tx[NUM_TX_QUEUE];
- u32 rx_buf_sz; /* Based on MTU+slack. */
struct napi_struct napi[NUM_RX_QUEUE];
struct work_struct work;
/* MII transceiver section. */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index de9aa8c47f1c..3f165c137236 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -230,7 +230,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
le32_to_cpu(desc->dptr)))
dma_unmap_single(ndev->dev.parent,
le32_to_cpu(desc->dptr),
- priv->rx_buf_sz,
+ RX_BUF_SZ,
DMA_FROM_DEVICE);
}
ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -293,9 +293,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
for (i = 0; i < priv->num_rx_ring[q]; i++) {
/* RX descriptor */
rx_desc = &priv->rx_ring[q][i];
- rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+ rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
- priv->rx_buf_sz,
+ RX_BUF_SZ,
DMA_FROM_DEVICE);
/* We just set the data size to 0 for a failed mapping which
* should prevent DMA from happening...
@@ -342,9 +342,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
int ring_size;
int i;
- priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
- ETH_HLEN + VLAN_HLEN + sizeof(__sum16);
-
/* Allocate RX and TX skb rings */
priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -354,7 +351,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
goto error;
for (i = 0; i < priv->num_rx_ring[q]; i++) {
- skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
+ skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1);
if (!skb)
goto error;
ravb_set_buffer_align(skb);
@@ -584,7 +581,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
skb = priv->rx_skb[q][entry];
priv->rx_skb[q][entry] = NULL;
dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
- priv->rx_buf_sz,
+ RX_BUF_SZ,
DMA_FROM_DEVICE);
get_ts &= (q == RAVB_NC) ?
RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -617,11 +614,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
desc = &priv->rx_ring[q][entry];
- desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+ desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
if (!priv->rx_skb[q][entry]) {
skb = netdev_alloc_skb(ndev,
- priv->rx_buf_sz +
+ RX_BUF_SZ +
RAVB_ALIGN - 1);
if (!skb)
break; /* Better luck next round. */
@@ -1801,10 +1798,15 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
{
- if (netif_running(ndev))
- return -EBUSY;
+ struct ravb_private *priv = netdev_priv(ndev);
ndev->mtu = new_mtu;
+
+ if (netif_running(ndev)) {
+ synchronize_irq(priv->emac_irq);
+ ravb_emac_init(ndev);
+ }
+
netdev_update_features(ndev);
return 0;
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index 9a42580693cb..6984bd5b7da9 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -182,6 +182,13 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
struct net_device *ndev = priv->ndev;
unsigned long flags;
+ /* Reject requests with unsupported flags */
+ if (req->flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
if (req->index)
return -EINVAL;
@@ -211,6 +218,10 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
unsigned long flags;
int error = 0;
+ /* Reject requests with unsupported flags */
+ if (req->flags)
+ return -EOPNOTSUPP;
+
if (req->index)
return -EINVAL;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index ddcc191febdb..6e47be63a43c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1226,7 +1226,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
dwmac_mux:
sun8i_dwmac_unset_syscon(gmac);
dwmac_exit:
- sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+ stmmac_pltfr_remove(pdev);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 775db776b3cc..23fecf68f781 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
// Copyright (c) 2017 Synopsys, Inc. and/or its affiliates.
// stmmac Support for 5.xx Ethernet QoS cores
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 99037386080a..9d08a934fe4f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
/*
* Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
* stmmac XGMAC definitions.
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index ddb851d99618..9010d881b12e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
// stmmac HW Interface Callbacks
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index df638b18b72c..0989e2bb6ee3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -140,6 +140,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
cfg = &priv->pps[rq->perout.index];
cfg->start.tv_sec = rq->perout.start.sec;
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 54ca6681ba31..44c2d857a7fa 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -708,6 +708,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
goto err_debugfs_exit;
devlink_params_publish(devlink);
+ devlink_reload_enable(devlink);
return nsim_dev;
err_debugfs_exit:
@@ -732,6 +733,7 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
{
struct devlink *devlink = priv_to_devlink(nsim_dev);
+ devlink_reload_disable(devlink);
nsim_bpf_dev_exit(nsim_dev);
nsim_dev_debugfs_exit(nsim_dev);
nsim_dev_traps_exit(devlink);
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 6580094161a9..8f241b57fcf6 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -469,6 +469,19 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
index = rq->extts.index;
if (index >= N_EXT_TS)
return -EINVAL;
@@ -491,6 +504,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
return 0;
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
if (rq->perout.index >= N_PER_OUT)
return -EINVAL;
return periodic_output(clock, rq, on, rq->perout.index);
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 2e29ab841b4d..35876562e32a 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -64,11 +64,12 @@ static int mdiobus_register_reset(struct mdio_device *mdiodev)
if (mdiodev->dev.of_node)
reset = devm_reset_control_get_exclusive(&mdiodev->dev,
"phy");
- if (PTR_ERR(reset) == -ENOENT ||
- PTR_ERR(reset) == -ENOTSUPP)
- reset = NULL;
- else if (IS_ERR(reset))
- return PTR_ERR(reset);
+ if (IS_ERR(reset)) {
+ if (PTR_ERR(reset) == -ENOENT || PTR_ERR(reset) == -ENOSYS)
+ reset = NULL;
+ else
+ return PTR_ERR(reset);
+ }
mdiodev->reset_ctrl = reset;
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index cac64b96d545..4d479e3c817d 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -855,6 +855,7 @@ err_free_chan:
sl->tty = NULL;
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
+ free_netdev(sl->dev);
err_exit:
rtnl_unlock();
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 011bd4cb546e..af3994e0853b 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -196,7 +196,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
/* Get the MAC address */
ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
- if (ret < 0) {
+ if (ret < ETH_ALEN) {
netdev_err(dev->net, "Failed to read MAC address: %d\n", ret);
goto free;
}
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a245597a3902..c2c82e6391b4 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -579,7 +579,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE,
USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
0, iface_no, &max_datagram_size, sizeof(max_datagram_size));
- if (err < sizeof(max_datagram_size)) {
+ if (err != sizeof(max_datagram_size)) {
dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n");
goto out;
}
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 56d334b9ad45..4196c0e32740 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1371,6 +1371,8 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
{QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
+ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */
+ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 8894027429d6..d80f71f82a6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -251,27 +251,23 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
struct ieee80211_hdr *hdr = (void *)skb->data;
unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
unsigned int mss = skb_shinfo(skb)->gso_size;
- u16 length, iv_len, amsdu_pad;
+ u16 length, amsdu_pad;
u8 *start_hdr;
struct iwl_tso_hdr_page *hdr_page;
struct page **page_ptr;
struct tso_t tso;
- /* if the packet is protected, then it must be CCMP or GCMP */
- iv_len = ieee80211_has_protected(hdr->frame_control) ?
- IEEE80211_CCMP_HDR_LEN : 0;
-
trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
&dev_cmd->hdr, start_len, 0);
ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
- total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
+ total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len;
amsdu_pad = 0;
/* total amount of header we may need for this A-MSDU */
hdr_room = DIV_ROUND_UP(total_len, mss) *
- (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
+ (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr));
/* Our device supports 9 segments at most, it will fit in 1 page */
hdr_page = get_page_hdr(trans, hdr_room);
@@ -282,14 +278,12 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
start_hdr = hdr_page->pos;
page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
*page_ptr = hdr_page->page;
- memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
- hdr_page->pos += iv_len;
/*
- * Pull the ieee80211 header + IV to be able to use TSO core,
+ * Pull the ieee80211 header to be able to use TSO core,
* we will restore it for the tx_status flow.
*/
- skb_pull(skb, hdr_len + iv_len);
+ skb_pull(skb, hdr_len);
/*
* Remove the length of all the headers that we don't actually
@@ -364,8 +358,8 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
}
}
- /* re -add the WiFi header and IV */
- skb_push(skb, hdr_len + iv_len);
+ /* re -add the WiFi header */
+ skb_push(skb, hdr_len);
return 0;
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index 307bd2afbe05..4d1909aecd6c 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -220,8 +220,10 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
if (r == -EREMOTEIO) {
phy->hard_fault = r;
- skb = NULL;
- } else if (r < 0) {
+ if (info->mode == NXP_NCI_MODE_FW)
+ nxp_nci_fw_recv_frame(phy->ndev, NULL);
+ }
+ if (r < 0) {
nfc_err(&client->dev, "Read failed with error %d\n", r);
goto exit_irq_handled;
}
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 9ff0538ee83a..be7a7d332332 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -2103,6 +2103,75 @@ put_table:
}
/**
+ * dev_pm_opp_adjust_voltage() - helper to change the voltage of an OPP
+ * @dev: device for which we do this operation
+ * @freq: OPP frequency to adjust voltage of
+ * @u_volt: new OPP target voltage
+ * @u_volt_min: new OPP min voltage
+ * @u_volt_max: new OPP max voltage
+ *
+ * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
+ * copy operation, returns 0 if no modifcation was done OR modification was
+ * successful.
+ */
+int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
+ unsigned long u_volt, unsigned long u_volt_min,
+ unsigned long u_volt_max)
+
+{
+ struct opp_table *opp_table;
+ struct dev_pm_opp *tmp_opp, *opp = ERR_PTR(-ENODEV);
+ int r = 0;
+
+ /* Find the opp_table */
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ r = PTR_ERR(opp_table);
+ dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
+ return r;
+ }
+
+ mutex_lock(&opp_table->lock);
+
+ /* Do we have the frequency? */
+ list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
+ if (tmp_opp->rate == freq) {
+ opp = tmp_opp;
+ break;
+ }
+ }
+
+ if (IS_ERR(opp)) {
+ r = PTR_ERR(opp);
+ goto adjust_unlock;
+ }
+
+ /* Is update really needed? */
+ if (opp->supplies->u_volt == u_volt)
+ goto adjust_unlock;
+
+ opp->supplies->u_volt = u_volt;
+ opp->supplies->u_volt_min = u_volt_min;
+ opp->supplies->u_volt_max = u_volt_max;
+
+ dev_pm_opp_get(opp);
+ mutex_unlock(&opp_table->lock);
+
+ /* Notify the voltage change of the OPP */
+ blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ADJUST_VOLTAGE,
+ opp);
+
+ dev_pm_opp_put(opp);
+ goto adjust_put_table;
+
+adjust_unlock:
+ mutex_unlock(&opp_table->lock);
+adjust_put_table:
+ dev_pm_opp_put_opp_table(opp_table);
+ return r;
+}
+
+/**
* dev_pm_opp_enable() - Enable a specific OPP
* @dev: device for which we do this operation
* @freq: OPP frequency to enable
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 94ddd7d659c8..a67701ed93e8 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -978,6 +978,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
INTEL_CPU_FAM6(ICELAKE_NNPI, rapl_defaults_core),
INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server),
INTEL_CPU_FAM6(ICELAKE_D, rapl_defaults_hsw_server),
+ INTEL_CPU_FAM6(COMETLAKE_L, rapl_defaults_core),
+ INTEL_CPU_FAM6(COMETLAKE, rapl_defaults_core),
INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt),
INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht),
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 67d0199840fd..9d72ab593f13 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -149,11 +149,21 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
err = -EFAULT;
break;
}
- if (((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
- req.extts.rsv[0] || req.extts.rsv[1]) &&
- cmd == PTP_EXTTS_REQUEST2) {
- err = -EINVAL;
- break;
+ if (cmd == PTP_EXTTS_REQUEST2) {
+ /* Tell the drivers to check the flags carefully. */
+ req.extts.flags |= PTP_STRICT_FLAGS;
+ /* Make sure no reserved bit is set. */
+ if ((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
+ req.extts.rsv[0] || req.extts.rsv[1]) {
+ err = -EINVAL;
+ break;
+ }
+ /* Ensure one of the rising/falling edge bits is set. */
+ if ((req.extts.flags & PTP_ENABLE_FEATURE) &&
+ (req.extts.flags & PTP_EXTTS_EDGES) == 0) {
+ err = -EINVAL;
+ break;
+ }
} else if (cmd == PTP_EXTTS_REQUEST) {
req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS;
req.extts.rsv[0] = 0;
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 6afad68e5ba2..238240984bc1 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -76,9 +76,11 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
* ensures no active vp_list traversal while the vport is removed
* from the queue)
*/
- for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++)
- wait_event_timeout(vha->vref_waitq,
- atomic_read(&vha->vref_count), HZ);
+ for (i = 0; i < 10; i++) {
+ if (wait_event_timeout(vha->vref_waitq,
+ !atomic_read(&vha->vref_count), HZ) > 0)
+ break;
+ }
spin_lock_irqsave(&ha->vport_slock, flags);
if (atomic_read(&vha->vref_count)) {
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 337162ac3a77..726ad4cbf4a6 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1119,9 +1119,11 @@ qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
qla2x00_mark_all_devices_lost(vha, 0);
- for (i = 0; i < 10; i++)
- wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha),
- HZ);
+ for (i = 0; i < 10; i++) {
+ if (wait_event_timeout(vha->fcport_waitQ,
+ test_fcport_count(vha), HZ) > 0)
+ break;
+ }
flush_workqueue(vha->hw->wq);
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5447738906ac..91c007d26c1e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1883,7 +1883,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
{
unsigned int cmd_size, sgl_size;
- sgl_size = scsi_mq_inline_sgl_size(shost);
+ sgl_size = max_t(unsigned int, sizeof(struct scatterlist),
+ scsi_mq_inline_sgl_size(shost));
cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
if (scsi_host_get_prot(shost))
cmd_size += sizeof(struct scsi_data_buffer) +
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index de4019dc0f0b..1efc69e194f8 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -263,25 +263,16 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
int result = cmd->result;
struct request *rq = cmd->request;
- switch (req_op(rq)) {
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_RESET_ALL:
-
- if (result &&
- sshdr->sense_key == ILLEGAL_REQUEST &&
- sshdr->asc == 0x24)
- /*
- * INVALID FIELD IN CDB error: reset of a conventional
- * zone was attempted. Nothing to worry about, so be
- * quiet about the error.
- */
- rq->rq_flags |= RQF_QUIET;
- break;
-
- case REQ_OP_WRITE:
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE_SAME:
- break;
+ if (req_op(rq) == REQ_OP_ZONE_RESET &&
+ result &&
+ sshdr->sense_key == ILLEGAL_REQUEST &&
+ sshdr->asc == 0x24) {
+ /*
+ * INVALID FIELD IN CDB error: reset of a conventional
+ * zone was attempted. Nothing to worry about, so be
+ * quiet about the error.
+ */
+ rq->rq_flags |= RQF_QUIET;
}
}
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 927d29eb92c6..6f1fa4c849a1 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -125,6 +125,4 @@ source "drivers/staging/exfat/Kconfig"
source "drivers/staging/qlge/Kconfig"
-source "drivers/staging/vboxsf/Kconfig"
-
endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index f01f04199073..a90f9b308c8d 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -53,4 +53,3 @@ obj-$(CONFIG_UWB) += uwb/
obj-$(CONFIG_USB_WUSB) += wusbcore/
obj-$(CONFIG_EXFAT_FS) += exfat/
obj-$(CONFIG_QLGE) += qlge/
-obj-$(CONFIG_VBOXSF_FS) += vboxsf/
diff --git a/drivers/staging/vboxsf/Kconfig b/drivers/staging/vboxsf/Kconfig
deleted file mode 100644
index b84586ae08b3..000000000000
--- a/drivers/staging/vboxsf/Kconfig
+++ /dev/null
@@ -1,10 +0,0 @@
-config VBOXSF_FS
- tristate "VirtualBox guest shared folder (vboxsf) support"
- depends on X86 && VBOXGUEST
- select NLS
- help
- VirtualBox hosts can share folders with guests, this driver
- implements the Linux-guest side of this allowing folders exported
- by the host to be mounted under Linux.
-
- If you want to use shared folders in VirtualBox guests, answer Y or M.
diff --git a/drivers/staging/vboxsf/Makefile b/drivers/staging/vboxsf/Makefile
deleted file mode 100644
index 9e4328e79623..000000000000
--- a/drivers/staging/vboxsf/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# SPDX-License-Identifier: MIT
-
-obj-$(CONFIG_VBOXSF_FS) += vboxsf.o
-
-vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o
diff --git a/drivers/staging/vboxsf/TODO b/drivers/staging/vboxsf/TODO
deleted file mode 100644
index 8b9193d0d4f0..000000000000
--- a/drivers/staging/vboxsf/TODO
+++ /dev/null
@@ -1,7 +0,0 @@
-TODO:
-- Find a file-system developer to review this and give their Reviewed-By
-- Address any items coming up during review
-- Move to fs/vboxfs
-
-Please send any patches to Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-and Hans de Goede <hdegoede@redhat.com>
diff --git a/drivers/staging/vboxsf/dir.c b/drivers/staging/vboxsf/dir.c
deleted file mode 100644
index f260b5cc1646..000000000000
--- a/drivers/staging/vboxsf/dir.c
+++ /dev/null
@@ -1,418 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * VirtualBox Guest Shared Folders support: Directory inode and file operations
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#include <linux/namei.h>
-#include <linux/vbox_utils.h>
-#include "vfsmod.h"
-
-static int vboxsf_dir_open(struct inode *inode, struct file *file)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
- struct shfl_createparms params = {};
- struct vboxsf_dir_info *sf_d;
- int err;
-
- sf_d = vboxsf_dir_info_alloc();
- if (!sf_d)
- return -ENOMEM;
-
- params.handle = SHFL_HANDLE_NIL;
- params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS |
- SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ;
-
- err = vboxsf_create_at_dentry(file_dentry(file), &params);
- if (err)
- goto err_free_dir_info;
-
- if (params.result != SHFL_FILE_EXISTS) {
- err = -ENOENT;
- goto err_close;
- }
-
- err = vboxsf_dir_read_all(sbi, sf_d, params.handle);
- if (err)
- goto err_close;
-
- vboxsf_close(sbi->root, params.handle);
- file->private_data = sf_d;
- return 0;
-
-err_close:
- vboxsf_close(sbi->root, params.handle);
-err_free_dir_info:
- vboxsf_dir_info_free(sf_d);
- return err;
-}
-
-static int vboxsf_dir_release(struct inode *inode, struct file *file)
-{
- if (file->private_data)
- vboxsf_dir_info_free(file->private_data);
-
- return 0;
-}
-
-static unsigned int vboxsf_get_d_type(u32 mode)
-{
- unsigned int d_type;
-
- switch (mode & SHFL_TYPE_MASK) {
- case SHFL_TYPE_FIFO:
- d_type = DT_FIFO;
- break;
- case SHFL_TYPE_DEV_CHAR:
- d_type = DT_CHR;
- break;
- case SHFL_TYPE_DIRECTORY:
- d_type = DT_DIR;
- break;
- case SHFL_TYPE_DEV_BLOCK:
- d_type = DT_BLK;
- break;
- case SHFL_TYPE_FILE:
- d_type = DT_REG;
- break;
- case SHFL_TYPE_SYMLINK:
- d_type = DT_LNK;
- break;
- case SHFL_TYPE_SOCKET:
- d_type = DT_SOCK;
- break;
- case SHFL_TYPE_WHITEOUT:
- d_type = DT_WHT;
- break;
- default:
- d_type = DT_UNKNOWN;
- break;
- }
- return d_type;
-}
-
-static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb);
- struct vboxsf_dir_info *sf_d = dir->private_data;
- struct shfl_dirinfo *info;
- struct vboxsf_dir_buf *b;
- unsigned int d_type;
- loff_t i, cur = 0;
- ino_t fake_ino;
- size_t size;
- int err;
-
- list_for_each_entry(b, &sf_d->info_list, head) {
-try_next_entry:
- if (ctx->pos >= cur + b->entries) {
- cur += b->entries;
- continue;
- }
-
- /*
- * Note the vboxsf_dir_info objects we are iterating over here
- * are variable sized, so the info pointer may end up being
- * unaligned. This is how we get the data from the host.
- * Since vboxsf is only supported on x86 machines this is not
- * a problem.
- */
- for (i = 0, info = b->buf; i < ctx->pos - cur; i++) {
- size = offsetof(struct shfl_dirinfo, name.string) +
- info->name.size;
- info = (struct shfl_dirinfo *)((uintptr_t)info + size);
- }
-
- /* Info now points to the right entry, emit it. */
- d_type = vboxsf_get_d_type(info->info.attr.mode);
-
- /*
- * On 32 bit systems pos is 64 signed, while ino is 32 bit
- * unsigned so fake_ino may overflow, check for this.
- */
- if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) {
- vbg_err("vboxsf: fake ino overflow, truncating dir\n");
- return false;
- }
- fake_ino = ctx->pos + 1;
-
- if (sbi->nls) {
- char d_name[NAME_MAX];
-
- err = vboxsf_nlscpy(sbi, d_name, NAME_MAX,
- info->name.string.utf8,
- info->name.length);
- if (err) {
- /* skip erroneous entry and proceed */
- ctx->pos += 1;
- goto try_next_entry;
- }
-
- return dir_emit(ctx, d_name, strlen(d_name),
- fake_ino, d_type);
- }
-
- return dir_emit(ctx, info->name.string.utf8, info->name.length,
- fake_ino, d_type);
- }
-
- return false;
-}
-
-static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx)
-{
- bool keep_iterating;
-
- for (keep_iterating = true; keep_iterating; ctx->pos += 1)
- keep_iterating = vboxsf_dir_emit(dir, ctx);
-
- return 0;
-}
-
-const struct file_operations vboxsf_dir_fops = {
- .open = vboxsf_dir_open,
- .iterate = vboxsf_dir_iterate,
- .release = vboxsf_dir_release,
- .read = generic_read_dir,
- .llseek = generic_file_llseek,
-};
-
-/*
- * This is called during name resolution/lookup to check if the @dentry in
- * the cache is still valid. the job is handled by vboxsf_inode_revalidate.
- */
-static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags)
-{
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- if (d_really_is_positive(dentry))
- return vboxsf_inode_revalidate(dentry) == 0;
- else
- return vboxsf_stat_dentry(dentry, NULL) == -ENOENT;
-}
-
-const struct dentry_operations vboxsf_dentry_ops = {
- .d_revalidate = vboxsf_dentry_revalidate
-};
-
-/* iops */
-
-static struct dentry *vboxsf_dir_lookup(struct inode *parent,
- struct dentry *dentry,
- unsigned int flags)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
- struct shfl_fsobjinfo fsinfo;
- struct inode *inode;
- int err;
-
- dentry->d_time = jiffies;
-
- err = vboxsf_stat_dentry(dentry, &fsinfo);
- if (err) {
- inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
- } else {
- inode = vboxsf_new_inode(parent->i_sb);
- if (!IS_ERR(inode))
- vboxsf_init_inode(sbi, inode, &fsinfo);
- }
-
- return d_splice_alias(inode, dentry);
-}
-
-static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry,
- struct shfl_fsobjinfo *info)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
- struct vboxsf_inode *sf_i;
- struct inode *inode;
-
- inode = vboxsf_new_inode(parent->i_sb);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- sf_i = VBOXSF_I(inode);
- /* The host may have given us different attr then requested */
- sf_i->force_restat = 1;
- vboxsf_init_inode(sbi, inode, info);
-
- d_instantiate(dentry, inode);
-
- return 0;
-}
-
-static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry,
- umode_t mode, int is_dir)
-{
- struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
- struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
- struct shfl_createparms params = {};
- int err;
-
- params.handle = SHFL_HANDLE_NIL;
- params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW |
- SHFL_CF_ACT_FAIL_IF_EXISTS |
- SHFL_CF_ACCESS_READWRITE |
- (is_dir ? SHFL_CF_DIRECTORY : 0);
- params.info.attr.mode = (mode & 0777) |
- (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE);
- params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING;
-
- err = vboxsf_create_at_dentry(dentry, &params);
- if (err)
- return err;
-
- if (params.result != SHFL_FILE_CREATED)
- return -EPERM;
-
- vboxsf_close(sbi->root, params.handle);
-
- err = vboxsf_dir_instantiate(parent, dentry, &params.info);
- if (err)
- return err;
-
- /* parent directory access/change time changed */
- sf_parent_i->force_restat = 1;
-
- return 0;
-}
-
-static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry,
- umode_t mode, bool excl)
-{
- return vboxsf_dir_create(parent, dentry, mode, 0);
-}
-
-static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry,
- umode_t mode)
-{
- return vboxsf_dir_create(parent, dentry, mode, 1);
-}
-
-static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
- struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
- struct inode *inode = d_inode(dentry);
- struct shfl_string *path;
- u32 flags;
- int err;
-
- if (S_ISDIR(inode->i_mode))
- flags = SHFL_REMOVE_DIR;
- else
- flags = SHFL_REMOVE_FILE;
-
- if (S_ISLNK(inode->i_mode))
- flags |= SHFL_REMOVE_SYMLINK;
-
- path = vboxsf_path_from_dentry(sbi, dentry);
- if (IS_ERR(path))
- return PTR_ERR(path);
-
- err = vboxsf_remove(sbi->root, path, flags);
- __putname(path);
- if (err)
- return err;
-
- /* parent directory access/change time changed */
- sf_parent_i->force_restat = 1;
-
- return 0;
-}
-
-static int vboxsf_dir_rename(struct inode *old_parent,
- struct dentry *old_dentry,
- struct inode *new_parent,
- struct dentry *new_dentry,
- unsigned int flags)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb);
- struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent);
- struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent);
- u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS;
- struct shfl_string *old_path, *new_path;
- int err;
-
- if (flags)
- return -EINVAL;
-
- old_path = vboxsf_path_from_dentry(sbi, old_dentry);
- if (IS_ERR(old_path))
- return PTR_ERR(old_path);
-
- new_path = vboxsf_path_from_dentry(sbi, new_dentry);
- if (IS_ERR(new_path)) {
- err = PTR_ERR(new_path);
- goto err_put_old_path;
- }
-
- if (d_inode(old_dentry)->i_mode & S_IFDIR)
- shfl_flags = 0;
-
- err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags);
- if (err == 0) {
- /* parent directories access/change time changed */
- sf_new_parent_i->force_restat = 1;
- sf_old_parent_i->force_restat = 1;
- }
-
- __putname(new_path);
-err_put_old_path:
- __putname(old_path);
- return err;
-}
-
-static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry,
- const char *symname)
-{
- struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
- struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
- int symname_size = strlen(symname) + 1;
- struct shfl_string *path, *ssymname;
- struct shfl_fsobjinfo info;
- int err;
-
- path = vboxsf_path_from_dentry(sbi, dentry);
- if (IS_ERR(path))
- return PTR_ERR(path);
-
- ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL);
- if (!ssymname) {
- __putname(path);
- return -ENOMEM;
- }
- ssymname->length = symname_size - 1;
- ssymname->size = symname_size;
- memcpy(ssymname->string.utf8, symname, symname_size);
-
- err = vboxsf_symlink(sbi->root, path, ssymname, &info);
- kfree(ssymname);
- __putname(path);
- if (err) {
- /* -EROFS means symlinks are note support -> -EPERM */
- return (err == -EROFS) ? -EPERM : err;
- }
-
- err = vboxsf_dir_instantiate(parent, dentry, &info);
- if (err)
- return err;
-
- /* parent directory access/change time changed */
- sf_parent_i->force_restat = 1;
- return 0;
-}
-
-const struct inode_operations vboxsf_dir_iops = {
- .lookup = vboxsf_dir_lookup,
- .create = vboxsf_dir_mkfile,
- .mkdir = vboxsf_dir_mkdir,
- .rmdir = vboxsf_dir_unlink,
- .unlink = vboxsf_dir_unlink,
- .rename = vboxsf_dir_rename,
- .symlink = vboxsf_dir_symlink,
- .getattr = vboxsf_getattr,
- .setattr = vboxsf_setattr,
-};
diff --git a/drivers/staging/vboxsf/file.c b/drivers/staging/vboxsf/file.c
deleted file mode 100644
index 4b61ccf83fca..000000000000
--- a/drivers/staging/vboxsf/file.c
+++ /dev/null
@@ -1,370 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * VirtualBox Guest Shared Folders support: Regular file inode and file ops.
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#include <linux/mm.h>
-#include <linux/page-flags.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/sizes.h>
-#include "vfsmod.h"
-
-struct vboxsf_handle {
- u64 handle;
- u32 root;
- u32 access_flags;
- struct kref refcount;
- struct list_head head;
-};
-
-static int vboxsf_file_open(struct inode *inode, struct file *file)
-{
- struct vboxsf_inode *sf_i = VBOXSF_I(inode);
- struct shfl_createparms params = {};
- struct vboxsf_handle *sf_handle;
- u32 access_flags = 0;
- int err;
-
- sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL);
- if (!sf_handle)
- return -ENOMEM;
-
- /*
- * We check the value of params.handle afterwards to find out if
- * the call succeeded or failed, as the API does not seem to cleanly
- * distinguish error and informational messages.
- *
- * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to
- * make the shared folders host service use our mode parameter.
- */
- params.handle = SHFL_HANDLE_NIL;
- if (file->f_flags & O_CREAT) {
- params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW;
- /*
- * We ignore O_EXCL, as the Linux kernel seems to call create
- * beforehand itself, so O_EXCL should always fail.
- */
- if (file->f_flags & O_TRUNC)
- params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
- else
- params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
- } else {
- params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW;
- if (file->f_flags & O_TRUNC)
- params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
- }
-
- switch (file->f_flags & O_ACCMODE) {
- case O_RDONLY:
- access_flags |= SHFL_CF_ACCESS_READ;
- break;
-
- case O_WRONLY:
- access_flags |= SHFL_CF_ACCESS_WRITE;
- break;
-
- case O_RDWR:
- access_flags |= SHFL_CF_ACCESS_READWRITE;
- break;
-
- default:
- WARN_ON(1);
- }
-
- if (file->f_flags & O_APPEND)
- access_flags |= SHFL_CF_ACCESS_APPEND;
-
- params.create_flags |= access_flags;
- params.info.attr.mode = inode->i_mode;
-
- err = vboxsf_create_at_dentry(file_dentry(file), &params);
- if (err == 0 && params.handle == SHFL_HANDLE_NIL)
- err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT;
- if (err) {
- kfree(sf_handle);
- return err;
- }
-
- /* the host may have given us different attr then requested */
- sf_i->force_restat = 1;
-
- /* init our handle struct and add it to the inode's handles list */
- sf_handle->handle = params.handle;
- sf_handle->root = VBOXSF_SBI(inode->i_sb)->root;
- sf_handle->access_flags = access_flags;
- kref_init(&sf_handle->refcount);
-
- mutex_lock(&sf_i->handle_list_mutex);
- list_add(&sf_handle->head, &sf_i->handle_list);
- mutex_unlock(&sf_i->handle_list_mutex);
-
- file->private_data = sf_handle;
- return 0;
-}
-
-static void vboxsf_handle_release(struct kref *refcount)
-{
- struct vboxsf_handle *sf_handle =
- container_of(refcount, struct vboxsf_handle, refcount);
-
- vboxsf_close(sf_handle->root, sf_handle->handle);
- kfree(sf_handle);
-}
-
-static int vboxsf_file_release(struct inode *inode, struct file *file)
-{
- struct vboxsf_inode *sf_i = VBOXSF_I(inode);
- struct vboxsf_handle *sf_handle = file->private_data;
-
- /*
- * When a file is closed on our (the guest) side, we want any subsequent
- * accesses done on the host side to see all changes done from our side.
- */
- filemap_write_and_wait(inode->i_mapping);
-
- mutex_lock(&sf_i->handle_list_mutex);
- list_del(&sf_handle->head);
- mutex_unlock(&sf_i->handle_list_mutex);
-
- kref_put(&sf_handle->refcount, vboxsf_handle_release);
- return 0;
-}
-
-/*
- * Write back dirty pages now, because there may not be any suitable
- * open files later
- */
-static void vboxsf_vma_close(struct vm_area_struct *vma)
-{
- filemap_write_and_wait(vma->vm_file->f_mapping);
-}
-
-static const struct vm_operations_struct vboxsf_file_vm_ops = {
- .close = vboxsf_vma_close,
- .fault = filemap_fault,
- .map_pages = filemap_map_pages,
-};
-
-static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma)
-{
- int err;
-
- err = generic_file_mmap(file, vma);
- if (!err)
- vma->vm_ops = &vboxsf_file_vm_ops;
-
- return err;
-}
-
-/*
- * Note that since we are accessing files on the host's filesystem, files
- * may always be changed underneath us by the host!
- *
- * The vboxsf API between the guest and the host does not offer any functions
- * to deal with this. There is no inode-generation to check for changes, no
- * events / callback on changes and no way to lock files.
- *
- * To avoid returning stale data when a file gets *opened* on our (the guest)
- * side, we do a "stat" on the host side, then compare the mtime with the
- * last known mtime and invalidate the page-cache if they differ.
- * This is done from vboxsf_inode_revalidate().
- *
- * When reads are done through the read_iter fop, it is possible to do
- * further cache revalidation then, there are 3 options to deal with this:
- *
- * 1) Rely solely on the revalidation done at open time
- * 2) Do another "stat" and compare mtime again. Unfortunately the vboxsf
- * host API does not allow stat on handles, so we would need to use
- * file->f_path.dentry and the stat will then fail if the file was unlinked
- * or renamed (and there is no thing like NFS' silly-rename). So we get:
- * 2a) "stat" and compare mtime, on stat failure invalidate the cache
- * 2b) "stat" and compare mtime, on stat failure do nothing
- * 3) Simply always call invalidate_inode_pages2_range on the range of the read
- *
- * Currently we are keeping things KISS and using option 1. this allows
- * directly using generic_file_read_iter without wrapping it.
- *
- * This means that only data written on the host side before open() on
- * the guest side is guaranteed to be seen by the guest. If necessary
- * we may provide other read-cache strategies in the future and make this
- * configurable through a mount option.
- */
-const struct file_operations vboxsf_reg_fops = {
- .llseek = generic_file_llseek,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .mmap = vboxsf_file_mmap,
- .open = vboxsf_file_open,
- .release = vboxsf_file_release,
- .fsync = noop_fsync,
- .splice_read = generic_file_splice_read,
-};
-
-const struct inode_operations vboxsf_reg_iops = {
- .getattr = vboxsf_getattr,
- .setattr = vboxsf_setattr
-};
-
-static int vboxsf_readpage(struct file *file, struct page *page)
-{
- struct vboxsf_handle *sf_handle = file->private_data;
- loff_t off = page_offset(page);
- u32 nread = PAGE_SIZE;
- u8 *buf;
- int err;
-
- buf = kmap(page);
-
- err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
- if (err == 0) {
- memset(&buf[nread], 0, PAGE_SIZE - nread);
- flush_dcache_page(page);
- SetPageUptodate(page);
- } else {
- SetPageError(page);
- }
-
- kunmap(page);
- unlock_page(page);
- return err;
-}
-
-static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
-{
- struct vboxsf_handle *h, *sf_handle = NULL;
-
- mutex_lock(&sf_i->handle_list_mutex);
- list_for_each_entry(h, &sf_i->handle_list, head) {
- if (h->access_flags == SHFL_CF_ACCESS_WRITE ||
- h->access_flags == SHFL_CF_ACCESS_READWRITE) {
- kref_get(&h->refcount);
- sf_handle = h;
- break;
- }
- }
- mutex_unlock(&sf_i->handle_list_mutex);
-
- return sf_handle;
-}
-
-static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- struct vboxsf_inode *sf_i = VBOXSF_I(inode);
- struct vboxsf_handle *sf_handle;
- loff_t off = page_offset(page);
- loff_t size = i_size_read(inode);
- u32 nwrite = PAGE_SIZE;
- u8 *buf;
- int err;
-
- if (off + PAGE_SIZE > size)
- nwrite = size & ~PAGE_MASK;
-
- sf_handle = vboxsf_get_write_handle(sf_i);
- if (!sf_handle)
- return -EBADF;
-
- buf = kmap(page);
- err = vboxsf_write(sf_handle->root, sf_handle->handle,
- off, &nwrite, buf);
- kunmap(page);
-
- kref_put(&sf_handle->refcount, vboxsf_handle_release);
-
- if (err == 0) {
- ClearPageError(page);
- /* mtime changed */
- sf_i->force_restat = 1;
- } else {
- ClearPageUptodate(page);
- }
-
- unlock_page(page);
- return err;
-}
-
-static int vboxsf_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = mapping->host;
- struct vboxsf_handle *sf_handle = file->private_data;
- unsigned int from = pos & ~PAGE_MASK;
- u32 nwritten = len;
- u8 *buf;
- int err;
-
- buf = kmap(page);
- err = vboxsf_write(sf_handle->root, sf_handle->handle,
- pos, &nwritten, buf + from);
- kunmap(page);
-
- if (err) {
- nwritten = 0;
- goto out;
- }
-
- /* mtime changed */
- VBOXSF_I(inode)->force_restat = 1;
-
- if (!PageUptodate(page) && nwritten == PAGE_SIZE)
- SetPageUptodate(page);
-
- pos += nwritten;
- if (pos > inode->i_size)
- i_size_write(inode, pos);
-
-out:
- unlock_page(page);
- put_page(page);
-
- return nwritten;
-}
-
-const struct address_space_operations vboxsf_reg_aops = {
- .readpage = vboxsf_readpage,
- .writepage = vboxsf_writepage,
- .set_page_dirty = __set_page_dirty_nobuffers,
- .write_begin = simple_write_begin,
- .write_end = vboxsf_write_end,
-};
-
-static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
- struct delayed_call *done)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
- struct shfl_string *path;
- char *link;
- int err;
-
- if (!dentry)
- return ERR_PTR(-ECHILD);
-
- path = vboxsf_path_from_dentry(sbi, dentry);
- if (IS_ERR(path))
- return (char *)path;
-
- link = kzalloc(PATH_MAX, GFP_KERNEL);
- if (!link) {
- __putname(path);
- return ERR_PTR(-ENOMEM);
- }
-
- err = vboxsf_readlink(sbi->root, path, PATH_MAX, link);
- __putname(path);
- if (err) {
- kfree(link);
- return ERR_PTR(err);
- }
-
- set_delayed_call(done, kfree_link, link);
- return link;
-}
-
-const struct inode_operations vboxsf_lnk_iops = {
- .get_link = vboxsf_get_link
-};
diff --git a/drivers/staging/vboxsf/shfl_hostintf.h b/drivers/staging/vboxsf/shfl_hostintf.h
deleted file mode 100644
index aca829062c12..000000000000
--- a/drivers/staging/vboxsf/shfl_hostintf.h
+++ /dev/null
@@ -1,901 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * VirtualBox Shared Folders: host interface definition.
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#ifndef SHFL_HOSTINTF_H
-#define SHFL_HOSTINTF_H
-
-#include <linux/vbox_vmmdev_types.h>
-
-/* The max in/out buffer size for a FN_READ or FN_WRITE call */
-#define SHFL_MAX_RW_COUNT (16 * SZ_1M)
-
-/*
- * Structures shared between guest and the service
- * can be relocated and use offsets to point to variable
- * length parts.
- *
- * Shared folders protocol works with handles.
- * Before doing any action on a file system object,
- * one have to obtain the object handle via a SHFL_FN_CREATE
- * request. A handle must be closed with SHFL_FN_CLOSE.
- */
-
-enum {
- SHFL_FN_QUERY_MAPPINGS = 1, /* Query mappings changes. */
- SHFL_FN_QUERY_MAP_NAME = 2, /* Query map name. */
- SHFL_FN_CREATE = 3, /* Open/create object. */
- SHFL_FN_CLOSE = 4, /* Close object handle. */
- SHFL_FN_READ = 5, /* Read object content. */
- SHFL_FN_WRITE = 6, /* Write new object content. */
- SHFL_FN_LOCK = 7, /* Lock/unlock a range in the object. */
- SHFL_FN_LIST = 8, /* List object content. */
- SHFL_FN_INFORMATION = 9, /* Query/set object information. */
- /* Note function number 10 is not used! */
- SHFL_FN_REMOVE = 11, /* Remove object */
- SHFL_FN_MAP_FOLDER_OLD = 12, /* Map folder (legacy) */
- SHFL_FN_UNMAP_FOLDER = 13, /* Unmap folder */
- SHFL_FN_RENAME = 14, /* Rename object */
- SHFL_FN_FLUSH = 15, /* Flush file */
- SHFL_FN_SET_UTF8 = 16, /* Select UTF8 filename encoding */
- SHFL_FN_MAP_FOLDER = 17, /* Map folder */
- SHFL_FN_READLINK = 18, /* Read symlink dest (as of VBox 4.0) */
- SHFL_FN_SYMLINK = 19, /* Create symlink (as of VBox 4.0) */
- SHFL_FN_SET_SYMLINKS = 20, /* Ask host to show symlinks (4.0+) */
-};
-
-/* Root handles for a mapping are of type u32, Root handles are unique. */
-#define SHFL_ROOT_NIL UINT_MAX
-
-/* Shared folders handle for an opened object are of type u64. */
-#define SHFL_HANDLE_NIL ULLONG_MAX
-
-/* Hardcoded maximum length (in chars) of a shared folder name. */
-#define SHFL_MAX_LEN (256)
-/* Hardcoded maximum number of shared folder mapping available to the guest. */
-#define SHFL_MAX_MAPPINGS (64)
-
-/** Shared folder string buffer structure. */
-struct shfl_string {
- /** Allocated size of the string member in bytes. */
- u16 size;
-
- /** Length of string without trailing nul in bytes. */
- u16 length;
-
- /** UTF-8 or UTF-16 string. Nul terminated. */
- union {
- u8 utf8[2];
- u16 utf16[1];
- u16 ucs2[1]; /* misnomer, use utf16. */
- } string;
-};
-VMMDEV_ASSERT_SIZE(shfl_string, 6);
-
-/* The size of shfl_string w/o the string part. */
-#define SHFLSTRING_HEADER_SIZE 4
-
-/* Calculate size of the string. */
-static inline u32 shfl_string_buf_size(const struct shfl_string *string)
-{
- return string ? SHFLSTRING_HEADER_SIZE + string->size : 0;
-}
-
-/* Set user id on execution (S_ISUID). */
-#define SHFL_UNIX_ISUID 0004000U
-/* Set group id on execution (S_ISGID). */
-#define SHFL_UNIX_ISGID 0002000U
-/* Sticky bit (S_ISVTX / S_ISTXT). */
-#define SHFL_UNIX_ISTXT 0001000U
-
-/* Owner readable (S_IRUSR). */
-#define SHFL_UNIX_IRUSR 0000400U
-/* Owner writable (S_IWUSR). */
-#define SHFL_UNIX_IWUSR 0000200U
-/* Owner executable (S_IXUSR). */
-#define SHFL_UNIX_IXUSR 0000100U
-
-/* Group readable (S_IRGRP). */
-#define SHFL_UNIX_IRGRP 0000040U
-/* Group writable (S_IWGRP). */
-#define SHFL_UNIX_IWGRP 0000020U
-/* Group executable (S_IXGRP). */
-#define SHFL_UNIX_IXGRP 0000010U
-
-/* Other readable (S_IROTH). */
-#define SHFL_UNIX_IROTH 0000004U
-/* Other writable (S_IWOTH). */
-#define SHFL_UNIX_IWOTH 0000002U
-/* Other executable (S_IXOTH). */
-#define SHFL_UNIX_IXOTH 0000001U
-
-/* Named pipe (fifo) (S_IFIFO). */
-#define SHFL_TYPE_FIFO 0010000U
-/* Character device (S_IFCHR). */
-#define SHFL_TYPE_DEV_CHAR 0020000U
-/* Directory (S_IFDIR). */
-#define SHFL_TYPE_DIRECTORY 0040000U
-/* Block device (S_IFBLK). */
-#define SHFL_TYPE_DEV_BLOCK 0060000U
-/* Regular file (S_IFREG). */
-#define SHFL_TYPE_FILE 0100000U
-/* Symbolic link (S_IFLNK). */
-#define SHFL_TYPE_SYMLINK 0120000U
-/* Socket (S_IFSOCK). */
-#define SHFL_TYPE_SOCKET 0140000U
-/* Whiteout (S_IFWHT). */
-#define SHFL_TYPE_WHITEOUT 0160000U
-/* Type mask (S_IFMT). */
-#define SHFL_TYPE_MASK 0170000U
-
-/* Checks the mode flags indicate a directory (S_ISDIR). */
-#define SHFL_IS_DIRECTORY(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY)
-/* Checks the mode flags indicate a symbolic link (S_ISLNK). */
-#define SHFL_IS_SYMLINK(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK)
-
-/** The available additional information in a shfl_fsobjattr object. */
-enum shfl_fsobjattr_add {
- /** No additional information is available / requested. */
- SHFLFSOBJATTRADD_NOTHING = 1,
- /**
- * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are
- * available / requested.
- */
- SHFLFSOBJATTRADD_UNIX,
- /**
- * The additional extended attribute size (shfl_fsobjattr::u::size) is
- * available / requested.
- */
- SHFLFSOBJATTRADD_EASIZE,
- /**
- * The last valid item (inclusive).
- * The valid range is SHFLFSOBJATTRADD_NOTHING thru
- * SHFLFSOBJATTRADD_LAST.
- */
- SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE,
-
- /** The usual 32-bit hack. */
- SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff
-};
-
-/**
- * Additional unix Attributes, these are available when
- * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX.
- */
-struct shfl_fsobjattr_unix {
- /**
- * The user owning the filesystem object (st_uid).
- * This field is ~0U if not supported.
- */
- u32 uid;
-
- /**
- * The group the filesystem object is assigned (st_gid).
- * This field is ~0U if not supported.
- */
- u32 gid;
-
- /**
- * Number of hard links to this filesystem object (st_nlink).
- * This field is 1 if the filesystem doesn't support hardlinking or
- * the information isn't available.
- */
- u32 hardlinks;
-
- /**
- * The device number of the device which this filesystem object resides
- * on (st_dev). This field is 0 if this information is not available.
- */
- u32 inode_id_device;
-
- /**
- * The unique identifier (within the filesystem) of this filesystem
- * object (st_ino). Together with inode_id_device, this field can be
- * used as a OS wide unique id, when both their values are not 0.
- * This field is 0 if the information is not available.
- */
- u64 inode_id;
-
- /**
- * User flags (st_flags).
- * This field is 0 if this information is not available.
- */
- u32 flags;
-
- /**
- * The current generation number (st_gen).
- * This field is 0 if this information is not available.
- */
- u32 generation_id;
-
- /**
- * The device number of a char. or block device type object (st_rdev).
- * This field is 0 if the file isn't a char. or block device or when
- * the OS doesn't use the major+minor device idenfication scheme.
- */
- u32 device;
-} __packed;
-
-/** Extended attribute size. */
-struct shfl_fsobjattr_easize {
- /** Size of EAs. */
- s64 cb;
-} __packed;
-
-/** Shared folder filesystem object attributes. */
-struct shfl_fsobjattr {
- /** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */
- u32 mode;
-
- /** The additional attributes available. */
- enum shfl_fsobjattr_add additional;
-
- /**
- * Additional attributes.
- *
- * Unless explicitly specified to an API, the API can provide additional
- * data as it is provided by the underlying OS.
- */
- union {
- struct shfl_fsobjattr_unix unix_attr;
- struct shfl_fsobjattr_easize size;
- } __packed u;
-} __packed;
-VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44);
-
-struct shfl_timespec {
- s64 ns_relative_to_unix_epoch;
-};
-
-/** Filesystem object information structure. */
-struct shfl_fsobjinfo {
- /**
- * Logical size (st_size).
- * For normal files this is the size of the file.
- * For symbolic links, this is the length of the path name contained
- * in the symbolic link.
- * For other objects this fields needs to be specified.
- */
- s64 size;
-
- /** Disk allocation size (st_blocks * DEV_BSIZE). */
- s64 allocated;
-
- /** Time of last access (st_atime). */
- struct shfl_timespec access_time;
-
- /** Time of last data modification (st_mtime). */
- struct shfl_timespec modification_time;
-
- /**
- * Time of last status change (st_ctime).
- * If not available this is set to modification_time.
- */
- struct shfl_timespec change_time;
-
- /**
- * Time of file birth (st_birthtime).
- * If not available this is set to change_time.
- */
- struct shfl_timespec birth_time;
-
- /** Attributes. */
- struct shfl_fsobjattr attr;
-
-} __packed;
-VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92);
-
-/**
- * result of an open/create request.
- * Along with handle value the result code
- * identifies what has happened while
- * trying to open the object.
- */
-enum shfl_create_result {
- SHFL_NO_RESULT,
- /** Specified path does not exist. */
- SHFL_PATH_NOT_FOUND,
- /** Path to file exists, but the last component does not. */
- SHFL_FILE_NOT_FOUND,
- /** File already exists and either has been opened or not. */
- SHFL_FILE_EXISTS,
- /** New file was created. */
- SHFL_FILE_CREATED,
- /** Existing file was replaced or overwritten. */
- SHFL_FILE_REPLACED
-};
-
-/* No flags. Initialization value. */
-#define SHFL_CF_NONE (0x00000000)
-
-/*
- * Only lookup the object, do not return a handle. When this is set all other
- * flags are ignored.
- */
-#define SHFL_CF_LOOKUP (0x00000001)
-
-/*
- * Open parent directory of specified object.
- * Useful for the corresponding Windows FSD flag
- * and for opening paths like \\dir\\*.* to search the 'dir'.
- */
-#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002)
-
-/* Create/open a directory. */
-#define SHFL_CF_DIRECTORY (0x00000004)
-
-/*
- * Open/create action to do if object exists
- * and if the object does not exists.
- * REPLACE file means atomically DELETE and CREATE.
- * OVERWRITE file means truncating the file to 0 and
- * setting new size.
- * When opening an existing directory REPLACE and OVERWRITE
- * actions are considered invalid, and cause returning
- * FILE_EXISTS with NIL handle.
- */
-#define SHFL_CF_ACT_MASK_IF_EXISTS (0x000000f0)
-#define SHFL_CF_ACT_MASK_IF_NEW (0x00000f00)
-
-/* What to do if object exists. */
-#define SHFL_CF_ACT_OPEN_IF_EXISTS (0x00000000)
-#define SHFL_CF_ACT_FAIL_IF_EXISTS (0x00000010)
-#define SHFL_CF_ACT_REPLACE_IF_EXISTS (0x00000020)
-#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030)
-
-/* What to do if object does not exist. */
-#define SHFL_CF_ACT_CREATE_IF_NEW (0x00000000)
-#define SHFL_CF_ACT_FAIL_IF_NEW (0x00000100)
-
-/* Read/write requested access for the object. */
-#define SHFL_CF_ACCESS_MASK_RW (0x00003000)
-
-/* No access requested. */
-#define SHFL_CF_ACCESS_NONE (0x00000000)
-/* Read access requested. */
-#define SHFL_CF_ACCESS_READ (0x00001000)
-/* Write access requested. */
-#define SHFL_CF_ACCESS_WRITE (0x00002000)
-/* Read/Write access requested. */
-#define SHFL_CF_ACCESS_READWRITE (0x00003000)
-
-/* Requested share access for the object. */
-#define SHFL_CF_ACCESS_MASK_DENY (0x0000c000)
-
-/* Allow any access. */
-#define SHFL_CF_ACCESS_DENYNONE (0x00000000)
-/* Do not allow read. */
-#define SHFL_CF_ACCESS_DENYREAD (0x00004000)
-/* Do not allow write. */
-#define SHFL_CF_ACCESS_DENYWRITE (0x00008000)
-/* Do not allow access. */
-#define SHFL_CF_ACCESS_DENYALL (0x0000c000)
-
-/* Requested access to attributes of the object. */
-#define SHFL_CF_ACCESS_MASK_ATTR (0x00030000)
-
-/* No access requested. */
-#define SHFL_CF_ACCESS_ATTR_NONE (0x00000000)
-/* Read access requested. */
-#define SHFL_CF_ACCESS_ATTR_READ (0x00010000)
-/* Write access requested. */
-#define SHFL_CF_ACCESS_ATTR_WRITE (0x00020000)
-/* Read/Write access requested. */
-#define SHFL_CF_ACCESS_ATTR_READWRITE (0x00030000)
-
-/*
- * The file is opened in append mode.
- * Ignored if SHFL_CF_ACCESS_WRITE is not set.
- */
-#define SHFL_CF_ACCESS_APPEND (0x00040000)
-
-/** Create parameters buffer struct for SHFL_FN_CREATE call */
-struct shfl_createparms {
- /** Returned handle of opened object. */
- u64 handle;
-
- /** Returned result of the operation */
- enum shfl_create_result result;
-
- /** SHFL_CF_* */
- u32 create_flags;
-
- /**
- * Attributes of object to create and
- * returned actual attributes of opened/created object.
- */
- struct shfl_fsobjinfo info;
-} __packed;
-
-/** Shared Folder directory information */
-struct shfl_dirinfo {
- /** Full information about the object. */
- struct shfl_fsobjinfo info;
- /**
- * The length of the short field (number of UTF16 chars).
- * It is 16-bit for reasons of alignment.
- */
- u16 short_name_len;
- /**
- * The short name for 8.3 compatibility.
- * Empty string if not available.
- */
- u16 short_name[14];
- struct shfl_string name;
-};
-
-/** Shared folder filesystem properties. */
-struct shfl_fsproperties {
- /**
- * The maximum size of a filesystem object name.
- * This does not include the '\\0'.
- */
- u32 max_component_len;
-
- /**
- * True if the filesystem is remote.
- * False if the filesystem is local.
- */
- bool remote;
-
- /**
- * True if the filesystem is case sensitive.
- * False if the filesystem is case insensitive.
- */
- bool case_sensitive;
-
- /**
- * True if the filesystem is mounted read only.
- * False if the filesystem is mounted read write.
- */
- bool read_only;
-
- /**
- * True if the filesystem can encode unicode object names.
- * False if it can't.
- */
- bool supports_unicode;
-
- /**
- * True if the filesystem is compresses.
- * False if it isn't or we don't know.
- */
- bool compressed;
-
- /**
- * True if the filesystem compresses of individual files.
- * False if it doesn't or we don't know.
- */
- bool file_compression;
-};
-VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12);
-
-struct shfl_volinfo {
- s64 total_allocation_bytes;
- s64 available_allocation_bytes;
- u32 bytes_per_allocation_unit;
- u32 bytes_per_sector;
- u32 serial;
- struct shfl_fsproperties properties;
-};
-
-
-/** SHFL_FN_MAP_FOLDER Parameters structure. */
-struct shfl_map_folder {
- /**
- * pointer, in:
- * Points to struct shfl_string buffer.
- */
- struct vmmdev_hgcm_function_parameter path;
-
- /**
- * pointer, out: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * pointer, in: UTF16
- * Path delimiter
- */
- struct vmmdev_hgcm_function_parameter delimiter;
-
- /**
- * pointer, in: SHFLROOT (u32)
- * Case senstive flag
- */
- struct vmmdev_hgcm_function_parameter case_sensitive;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_MAP_FOLDER (4)
-
-
-/** SHFL_FN_UNMAP_FOLDER Parameters structure. */
-struct shfl_unmap_folder {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_UNMAP_FOLDER (1)
-
-
-/** SHFL_FN_CREATE Parameters structure. */
-struct shfl_create {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * pointer, in:
- * Points to struct shfl_string buffer.
- */
- struct vmmdev_hgcm_function_parameter path;
-
- /**
- * pointer, in/out:
- * Points to struct shfl_createparms buffer.
- */
- struct vmmdev_hgcm_function_parameter parms;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_CREATE (3)
-
-
-/** SHFL_FN_CLOSE Parameters structure. */
-struct shfl_close {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * value64, in:
- * SHFLHANDLE (u64) of object to close.
- */
- struct vmmdev_hgcm_function_parameter handle;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_CLOSE (2)
-
-
-/** SHFL_FN_READ Parameters structure. */
-struct shfl_read {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * value64, in:
- * SHFLHANDLE (u64) of object to read from.
- */
- struct vmmdev_hgcm_function_parameter handle;
-
- /**
- * value64, in:
- * Offset to read from.
- */
- struct vmmdev_hgcm_function_parameter offset;
-
- /**
- * value64, in/out:
- * Bytes to read/How many were read.
- */
- struct vmmdev_hgcm_function_parameter cb;
-
- /**
- * pointer, out:
- * Buffer to place data to.
- */
- struct vmmdev_hgcm_function_parameter buffer;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_READ (5)
-
-
-/** SHFL_FN_WRITE Parameters structure. */
-struct shfl_write {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * value64, in:
- * SHFLHANDLE (u64) of object to write to.
- */
- struct vmmdev_hgcm_function_parameter handle;
-
- /**
- * value64, in:
- * Offset to write to.
- */
- struct vmmdev_hgcm_function_parameter offset;
-
- /**
- * value64, in/out:
- * Bytes to write/How many were written.
- */
- struct vmmdev_hgcm_function_parameter cb;
-
- /**
- * pointer, in:
- * Data to write.
- */
- struct vmmdev_hgcm_function_parameter buffer;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_WRITE (5)
-
-
-/*
- * SHFL_FN_LIST
- * Listing information includes variable length RTDIRENTRY[EX] structures.
- */
-
-#define SHFL_LIST_NONE 0
-#define SHFL_LIST_RETURN_ONE 1
-
-/** SHFL_FN_LIST Parameters structure. */
-struct shfl_list {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * value64, in:
- * SHFLHANDLE (u64) of object to be listed.
- */
- struct vmmdev_hgcm_function_parameter handle;
-
- /**
- * value32, in:
- * List flags SHFL_LIST_*.
- */
- struct vmmdev_hgcm_function_parameter flags;
-
- /**
- * value32, in/out:
- * Bytes to be used for listing information/How many bytes were used.
- */
- struct vmmdev_hgcm_function_parameter cb;
-
- /**
- * pointer, in/optional
- * Points to struct shfl_string buffer that specifies a search path.
- */
- struct vmmdev_hgcm_function_parameter path;
-
- /**
- * pointer, out:
- * Buffer to place listing information to. (struct shfl_dirinfo)
- */
- struct vmmdev_hgcm_function_parameter buffer;
-
- /**
- * value32, in/out:
- * Indicates a key where the listing must be resumed.
- * in: 0 means start from begin of object.
- * out: 0 means listing completed.
- */
- struct vmmdev_hgcm_function_parameter resume_point;
-
- /**
- * pointer, out:
- * Number of files returned
- */
- struct vmmdev_hgcm_function_parameter file_count;
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_LIST (8)
-
-
-/** SHFL_FN_READLINK Parameters structure. */
-struct shfl_readLink {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * pointer, in:
- * Points to struct shfl_string buffer.
- */
- struct vmmdev_hgcm_function_parameter path;
-
- /**
- * pointer, out:
- * Buffer to place data to.
- */
- struct vmmdev_hgcm_function_parameter buffer;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_READLINK (3)
-
-
-/* SHFL_FN_INFORMATION */
-
-/* Mask of Set/Get bit. */
-#define SHFL_INFO_MODE_MASK (0x1)
-/* Get information */
-#define SHFL_INFO_GET (0x0)
-/* Set information */
-#define SHFL_INFO_SET (0x1)
-
-/* Get name of the object. */
-#define SHFL_INFO_NAME (0x2)
-/* Set size of object (extend/trucate); only applies to file objects */
-#define SHFL_INFO_SIZE (0x4)
-/* Get/Set file object info. */
-#define SHFL_INFO_FILE (0x8)
-/* Get volume information. */
-#define SHFL_INFO_VOLUME (0x10)
-
-/** SHFL_FN_INFORMATION Parameters structure. */
-struct shfl_information {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * value64, in:
- * SHFLHANDLE (u64) of object to be listed.
- */
- struct vmmdev_hgcm_function_parameter handle;
-
- /**
- * value32, in:
- * SHFL_INFO_*
- */
- struct vmmdev_hgcm_function_parameter flags;
-
- /**
- * value32, in/out:
- * Bytes to be used for information/How many bytes were used.
- */
- struct vmmdev_hgcm_function_parameter cb;
-
- /**
- * pointer, in/out:
- * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not
- * forget to set the shfl_fsobjinfo::attr::additional for a get
- * operation as well.
- */
- struct vmmdev_hgcm_function_parameter info;
-
-};
-
-/* Number of parameters */
-#define SHFL_CPARMS_INFORMATION (5)
-
-
-/* SHFL_FN_REMOVE */
-
-#define SHFL_REMOVE_FILE (0x1)
-#define SHFL_REMOVE_DIR (0x2)
-#define SHFL_REMOVE_SYMLINK (0x4)
-
-/** SHFL_FN_REMOVE Parameters structure. */
-struct shfl_remove {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * pointer, in:
- * Points to struct shfl_string buffer.
- */
- struct vmmdev_hgcm_function_parameter path;
-
- /**
- * value32, in:
- * remove flags (file/directory)
- */
- struct vmmdev_hgcm_function_parameter flags;
-
-};
-
-#define SHFL_CPARMS_REMOVE (3)
-
-
-/* SHFL_FN_RENAME */
-
-#define SHFL_RENAME_FILE (0x1)
-#define SHFL_RENAME_DIR (0x2)
-#define SHFL_RENAME_REPLACE_IF_EXISTS (0x4)
-
-/** SHFL_FN_RENAME Parameters structure. */
-struct shfl_rename {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * pointer, in:
- * Points to struct shfl_string src.
- */
- struct vmmdev_hgcm_function_parameter src;
-
- /**
- * pointer, in:
- * Points to struct shfl_string dest.
- */
- struct vmmdev_hgcm_function_parameter dest;
-
- /**
- * value32, in:
- * rename flags (file/directory)
- */
- struct vmmdev_hgcm_function_parameter flags;
-
-};
-
-#define SHFL_CPARMS_RENAME (4)
-
-
-/** SHFL_FN_SYMLINK Parameters structure. */
-struct shfl_symlink {
- /**
- * pointer, in: SHFLROOT (u32)
- * Root handle of the mapping which name is queried.
- */
- struct vmmdev_hgcm_function_parameter root;
-
- /**
- * pointer, in:
- * Points to struct shfl_string of path for the new symlink.
- */
- struct vmmdev_hgcm_function_parameter new_path;
-
- /**
- * pointer, in:
- * Points to struct shfl_string of destination for symlink.
- */
- struct vmmdev_hgcm_function_parameter old_path;
-
- /**
- * pointer, out:
- * Information about created symlink.
- */
- struct vmmdev_hgcm_function_parameter info;
-
-};
-
-#define SHFL_CPARMS_SYMLINK (4)
-
-#endif
diff --git a/drivers/staging/vboxsf/super.c b/drivers/staging/vboxsf/super.c
deleted file mode 100644
index 0bf4d724aefd..000000000000
--- a/drivers/staging/vboxsf/super.c
+++ /dev/null
@@ -1,501 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * VirtualBox Guest Shared Folders support: Virtual File System.
- *
- * Module initialization/finalization
- * File system registration/deregistration
- * Superblock reading
- * Few utility functions
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#include <linux/idr.h>
-#include <linux/fs_parser.h>
-#include <linux/magic.h>
-#include <linux/module.h>
-#include <linux/nls.h>
-#include <linux/statfs.h>
-#include <linux/vbox_utils.h>
-#include "vfsmod.h"
-
-#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
-
-#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000')
-#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377')
-#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376')
-#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375')
-
-static int follow_symlinks;
-module_param(follow_symlinks, int, 0444);
-MODULE_PARM_DESC(follow_symlinks,
- "Let host resolve symlinks rather than showing them");
-
-static DEFINE_IDA(vboxsf_bdi_ida);
-static DEFINE_MUTEX(vboxsf_setup_mutex);
-static bool vboxsf_setup_done;
-static struct super_operations vboxsf_super_ops; /* forward declaration */
-static struct kmem_cache *vboxsf_inode_cachep;
-
-static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT;
-
-enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode,
- opt_dmask, opt_fmask };
-
-static const struct fs_parameter_spec vboxsf_param_specs[] = {
- fsparam_string ("nls", opt_nls),
- fsparam_u32 ("uid", opt_uid),
- fsparam_u32 ("gid", opt_gid),
- fsparam_u32 ("ttl", opt_ttl),
- fsparam_u32oct ("dmode", opt_dmode),
- fsparam_u32oct ("fmode", opt_fmode),
- fsparam_u32oct ("dmask", opt_dmask),
- fsparam_u32oct ("fmask", opt_fmask),
- {}
-};
-
-static const struct fs_parameter_description vboxsf_fs_parameters = {
- .name = "vboxsf",
- .specs = vboxsf_param_specs,
-};
-
-static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
-{
- struct vboxsf_fs_context *ctx = fc->fs_private;
- struct fs_parse_result result;
- kuid_t uid;
- kgid_t gid;
- int opt;
-
- opt = fs_parse(fc, &vboxsf_fs_parameters, param, &result);
- if (opt < 0)
- return opt;
-
- switch (opt) {
- case opt_nls:
- if (fc->purpose != FS_CONTEXT_FOR_MOUNT) {
- vbg_err("vboxsf: Cannot reconfigure nls option\n");
- return -EINVAL;
- }
- ctx->nls_name = param->string;
- param->string = NULL;
- break;
- case opt_uid:
- uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(uid))
- return -EINVAL;
- ctx->o.uid = uid;
- break;
- case opt_gid:
- gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(gid))
- return -EINVAL;
- ctx->o.gid = gid;
- break;
- case opt_ttl:
- ctx->o.ttl = msecs_to_jiffies(result.uint_32);
- break;
- case opt_dmode:
- if (result.uint_32 & ~0777)
- return -EINVAL;
- ctx->o.dmode = result.uint_32;
- ctx->o.dmode_set = true;
- break;
- case opt_fmode:
- if (result.uint_32 & ~0777)
- return -EINVAL;
- ctx->o.fmode = result.uint_32;
- ctx->o.fmode_set = true;
- break;
- case opt_dmask:
- if (result.uint_32 & ~07777)
- return -EINVAL;
- ctx->o.dmask = result.uint_32;
- break;
- case opt_fmask:
- if (result.uint_32 & ~07777)
- return -EINVAL;
- ctx->o.fmask = result.uint_32;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
-{
- struct vboxsf_fs_context *ctx = fc->fs_private;
- struct shfl_string *folder_name, root_path;
- struct vboxsf_sbi *sbi;
- struct dentry *droot;
- struct inode *iroot;
- char *nls_name;
- size_t size;
- int err;
-
- if (!fc->source)
- return -EINVAL;
-
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
-
- sbi->o = ctx->o;
- idr_init(&sbi->ino_idr);
- spin_lock_init(&sbi->ino_idr_lock);
- sbi->next_generation = 1;
- sbi->bdi_id = -1;
-
- /* Load nls if not utf8 */
- nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls;
- if (strcmp(nls_name, "utf8") != 0) {
- if (nls_name == vboxsf_default_nls)
- sbi->nls = load_nls_default();
- else
- sbi->nls = load_nls(nls_name);
-
- if (!sbi->nls) {
- vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
- err = -EINVAL;
- goto fail_free;
- }
- }
-
- sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
- if (sbi->bdi_id < 0) {
- err = sbi->bdi_id;
- goto fail_free;
- }
-
- err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
- if (err)
- goto fail_free;
-
- /* Turn source into a shfl_string and map the folder */
- size = strlen(fc->source) + 1;
- folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL);
- if (!folder_name) {
- err = -ENOMEM;
- goto fail_free;
- }
- folder_name->size = size;
- folder_name->length = size - 1;
- strlcpy(folder_name->string.utf8, fc->source, size);
- err = vboxsf_map_folder(folder_name, &sbi->root);
- kfree(folder_name);
- if (err) {
- vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n",
- fc->source, err);
- goto fail_free;
- }
-
- root_path.length = 1;
- root_path.size = 2;
- root_path.string.utf8[0] = '/';
- root_path.string.utf8[1] = 0;
- err = vboxsf_stat(sbi, &root_path, &sbi->root_info);
- if (err)
- goto fail_unmap;
-
- sb->s_magic = VBOXSF_SUPER_MAGIC;
- sb->s_blocksize = 1024;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_op = &vboxsf_super_ops;
- sb->s_d_op = &vboxsf_dentry_ops;
-
- iroot = iget_locked(sb, 0);
- if (!iroot) {
- err = -ENOMEM;
- goto fail_unmap;
- }
- vboxsf_init_inode(sbi, iroot, &sbi->root_info);
- unlock_new_inode(iroot);
-
- droot = d_make_root(iroot);
- if (!droot) {
- err = -ENOMEM;
- goto fail_unmap;
- }
-
- sb->s_root = droot;
- sb->s_fs_info = sbi;
- return 0;
-
-fail_unmap:
- vboxsf_unmap_folder(sbi->root);
-fail_free:
- if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
- if (sbi->nls)
- unload_nls(sbi->nls);
- idr_destroy(&sbi->ino_idr);
- kfree(sbi);
- return err;
-}
-
-static void vboxsf_inode_init_once(void *data)
-{
- struct vboxsf_inode *sf_i = data;
-
- mutex_init(&sf_i->handle_list_mutex);
- inode_init_once(&sf_i->vfs_inode);
-}
-
-static struct inode *vboxsf_alloc_inode(struct super_block *sb)
-{
- struct vboxsf_inode *sf_i;
-
- sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS);
- if (!sf_i)
- return NULL;
-
- sf_i->force_restat = 0;
- INIT_LIST_HEAD(&sf_i->handle_list);
-
- return &sf_i->vfs_inode;
-}
-
-static void vboxsf_free_inode(struct inode *inode)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
- unsigned long flags;
-
- spin_lock_irqsave(&sbi->ino_idr_lock, flags);
- idr_remove(&sbi->ino_idr, inode->i_ino);
- spin_unlock_irqrestore(&sbi->ino_idr_lock, flags);
- kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode));
-}
-
-static void vboxsf_put_super(struct super_block *sb)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(sb);
-
- vboxsf_unmap_folder(sbi->root);
- if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
- if (sbi->nls)
- unload_nls(sbi->nls);
-
- /*
- * vboxsf_free_inode uses the idr, make sure all delayed rcu free
- * inodes are flushed.
- */
- rcu_barrier();
- idr_destroy(&sbi->ino_idr);
- kfree(sbi);
-}
-
-static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat)
-{
- struct super_block *sb = dentry->d_sb;
- struct shfl_volinfo shfl_volinfo;
- struct vboxsf_sbi *sbi;
- u32 buf_len;
- int err;
-
- sbi = VBOXSF_SBI(sb);
- buf_len = sizeof(shfl_volinfo);
- err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME,
- &buf_len, &shfl_volinfo);
- if (err)
- return err;
-
- stat->f_type = VBOXSF_SUPER_MAGIC;
- stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit;
-
- do_div(shfl_volinfo.total_allocation_bytes,
- shfl_volinfo.bytes_per_allocation_unit);
- stat->f_blocks = shfl_volinfo.total_allocation_bytes;
-
- do_div(shfl_volinfo.available_allocation_bytes,
- shfl_volinfo.bytes_per_allocation_unit);
- stat->f_bfree = shfl_volinfo.available_allocation_bytes;
- stat->f_bavail = shfl_volinfo.available_allocation_bytes;
-
- stat->f_files = 1000;
- /*
- * Don't return 0 here since the guest may then think that it is not
- * possible to create any more files.
- */
- stat->f_ffree = 1000000;
- stat->f_fsid.val[0] = 0;
- stat->f_fsid.val[1] = 0;
- stat->f_namelen = 255;
- return 0;
-}
-
-static struct super_operations vboxsf_super_ops = {
- .alloc_inode = vboxsf_alloc_inode,
- .free_inode = vboxsf_free_inode,
- .put_super = vboxsf_put_super,
- .statfs = vboxsf_statfs,
-};
-
-static int vboxsf_setup(void)
-{
- int err;
-
- mutex_lock(&vboxsf_setup_mutex);
-
- if (vboxsf_setup_done)
- goto success;
-
- vboxsf_inode_cachep =
- kmem_cache_create("vboxsf_inode_cache",
- sizeof(struct vboxsf_inode), 0,
- (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
- SLAB_ACCOUNT),
- vboxsf_inode_init_once);
- if (!vboxsf_inode_cachep) {
- err = -ENOMEM;
- goto fail_nomem;
- }
-
- err = vboxsf_connect();
- if (err) {
- vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err);
- vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n");
- vbg_err("vboxsf: and check dmesg for vboxguest errors\n");
- goto fail_free_cache;
- }
-
- err = vboxsf_set_utf8();
- if (err) {
- vbg_err("vboxsf_setutf8 error %d\n", err);
- goto fail_disconnect;
- }
-
- if (!follow_symlinks) {
- err = vboxsf_set_symlinks();
- if (err)
- vbg_warn("vboxsf: Unable to show symlinks: %d\n", err);
- }
-
- vboxsf_setup_done = true;
-success:
- mutex_unlock(&vboxsf_setup_mutex);
- return 0;
-
-fail_disconnect:
- vboxsf_disconnect();
-fail_free_cache:
- kmem_cache_destroy(vboxsf_inode_cachep);
-fail_nomem:
- mutex_unlock(&vboxsf_setup_mutex);
- return err;
-}
-
-static int vboxsf_parse_monolithic(struct fs_context *fc, void *data)
-{
- char *options = data;
-
- if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 &&
- options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 &&
- options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 &&
- options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) {
- vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n");
- return -EINVAL;
- }
-
- return generic_parse_monolithic(fc, data);
-}
-
-static int vboxsf_get_tree(struct fs_context *fc)
-{
- int err;
-
- err = vboxsf_setup();
- if (err)
- return err;
-
- return vfs_get_super(fc, vfs_get_independent_super, vboxsf_fill_super);
-}
-
-static int vboxsf_reconfigure(struct fs_context *fc)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb);
- struct vboxsf_fs_context *ctx = fc->fs_private;
- struct inode *iroot;
-
- iroot = ilookup(fc->root->d_sb, 0);
- if (!iroot)
- return -ENOENT;
-
- /* Apply changed options to the root inode */
- sbi->o = ctx->o;
- vboxsf_init_inode(sbi, iroot, &sbi->root_info);
-
- return 0;
-}
-
-static void vboxsf_free_fc(struct fs_context *fc)
-{
- struct vboxsf_fs_context *ctx = fc->fs_private;
-
- kfree(ctx->nls_name);
- kfree(ctx);
-}
-
-static const struct fs_context_operations vboxsf_context_ops = {
- .free = vboxsf_free_fc,
- .parse_param = vboxsf_parse_param,
- .parse_monolithic = vboxsf_parse_monolithic,
- .get_tree = vboxsf_get_tree,
- .reconfigure = vboxsf_reconfigure,
-};
-
-static int vboxsf_init_fs_context(struct fs_context *fc)
-{
- struct vboxsf_fs_context *ctx;
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- current_uid_gid(&ctx->o.uid, &ctx->o.gid);
-
- fc->fs_private = ctx;
- fc->ops = &vboxsf_context_ops;
- return 0;
-}
-
-static struct file_system_type vboxsf_fs_type = {
- .owner = THIS_MODULE,
- .name = "vboxsf",
- .init_fs_context = vboxsf_init_fs_context,
- .parameters = &vboxsf_fs_parameters,
- .kill_sb = kill_anon_super
-};
-
-/* Module initialization/finalization handlers */
-static int __init vboxsf_init(void)
-{
- return register_filesystem(&vboxsf_fs_type);
-}
-
-static void __exit vboxsf_fini(void)
-{
- unregister_filesystem(&vboxsf_fs_type);
-
- mutex_lock(&vboxsf_setup_mutex);
- if (vboxsf_setup_done) {
- vboxsf_disconnect();
- /*
- * Make sure all delayed rcu free inodes are flushed
- * before we destroy the cache.
- */
- rcu_barrier();
- kmem_cache_destroy(vboxsf_inode_cachep);
- }
- mutex_unlock(&vboxsf_setup_mutex);
-}
-
-module_init(vboxsf_init);
-module_exit(vboxsf_fini);
-
-MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access");
-MODULE_AUTHOR("Oracle Corporation");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_FS("vboxsf");
diff --git a/drivers/staging/vboxsf/utils.c b/drivers/staging/vboxsf/utils.c
deleted file mode 100644
index 34a49e6f74fc..000000000000
--- a/drivers/staging/vboxsf/utils.c
+++ /dev/null
@@ -1,551 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * VirtualBox Guest Shared Folders support: Utility functions.
- * Mainly conversion from/to VirtualBox/Linux data structures.
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#include <linux/namei.h>
-#include <linux/nls.h>
-#include <linux/sizes.h>
-#include <linux/vfs.h>
-#include "vfsmod.h"
-
-struct inode *vboxsf_new_inode(struct super_block *sb)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(sb);
- struct inode *inode;
- unsigned long flags;
- int cursor, ret;
- u32 gen;
-
- inode = new_inode(sb);
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(&sbi->ino_idr_lock, flags);
- cursor = idr_get_cursor(&sbi->ino_idr);
- ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC);
- if (ret >= 0 && ret < cursor)
- sbi->next_generation++;
- gen = sbi->next_generation;
- spin_unlock_irqrestore(&sbi->ino_idr_lock, flags);
- idr_preload_end();
-
- if (ret < 0) {
- iput(inode);
- return ERR_PTR(ret);
- }
-
- inode->i_ino = ret;
- inode->i_generation = gen;
- return inode;
-}
-
-/* set [inode] attributes based on [info], uid/gid based on [sbi] */
-void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
- const struct shfl_fsobjinfo *info)
-{
- const struct shfl_fsobjattr *attr;
- s64 allocated;
- int mode;
-
- attr = &info->attr;
-
-#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0)
-
- mode = mode_set(IRUSR);
- mode |= mode_set(IWUSR);
- mode |= mode_set(IXUSR);
-
- mode |= mode_set(IRGRP);
- mode |= mode_set(IWGRP);
- mode |= mode_set(IXGRP);
-
- mode |= mode_set(IROTH);
- mode |= mode_set(IWOTH);
- mode |= mode_set(IXOTH);
-
-#undef mode_set
-
- /* We use the host-side values for these */
- inode->i_flags |= S_NOATIME | S_NOCMTIME;
- inode->i_mapping->a_ops = &vboxsf_reg_aops;
-
- if (SHFL_IS_DIRECTORY(attr->mode)) {
- inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode;
- inode->i_mode &= ~sbi->o.dmask;
- inode->i_mode |= S_IFDIR;
- inode->i_op = &vboxsf_dir_iops;
- inode->i_fop = &vboxsf_dir_fops;
- /*
- * XXX: this probably should be set to the number of entries
- * in the directory plus two (. ..)
- */
- set_nlink(inode, 1);
- } else if (SHFL_IS_SYMLINK(attr->mode)) {
- inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode;
- inode->i_mode &= ~sbi->o.fmask;
- inode->i_mode |= S_IFLNK;
- inode->i_op = &vboxsf_lnk_iops;
- set_nlink(inode, 1);
- } else {
- inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode;
- inode->i_mode &= ~sbi->o.fmask;
- inode->i_mode |= S_IFREG;
- inode->i_op = &vboxsf_reg_iops;
- inode->i_fop = &vboxsf_reg_fops;
- set_nlink(inode, 1);
- }
-
- inode->i_uid = sbi->o.uid;
- inode->i_gid = sbi->o.gid;
-
- inode->i_size = info->size;
- inode->i_blkbits = 12;
- /* i_blocks always in units of 512 bytes! */
- allocated = info->allocated + 511;
- do_div(allocated, 512);
- inode->i_blocks = allocated;
-
- inode->i_atime = ns_to_timespec64(
- info->access_time.ns_relative_to_unix_epoch);
- inode->i_ctime = ns_to_timespec64(
- info->change_time.ns_relative_to_unix_epoch);
- inode->i_mtime = ns_to_timespec64(
- info->modification_time.ns_relative_to_unix_epoch);
-}
-
-int vboxsf_create_at_dentry(struct dentry *dentry,
- struct shfl_createparms *params)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
- struct shfl_string *path;
- int err;
-
- path = vboxsf_path_from_dentry(sbi, dentry);
- if (IS_ERR(path))
- return PTR_ERR(path);
-
- err = vboxsf_create(sbi->root, path, params);
- __putname(path);
-
- return err;
-}
-
-int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
- struct shfl_fsobjinfo *info)
-{
- struct shfl_createparms params = {};
- int err;
-
- params.handle = SHFL_HANDLE_NIL;
- params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW;
-
- err = vboxsf_create(sbi->root, path, &params);
- if (err)
- return err;
-
- if (params.result != SHFL_FILE_EXISTS)
- return -ENOENT;
-
- if (info)
- *info = params.info;
-
- return 0;
-}
-
-int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info)
-{
- struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
- struct shfl_string *path;
- int err;
-
- path = vboxsf_path_from_dentry(sbi, dentry);
- if (IS_ERR(path))
- return PTR_ERR(path);
-
- err = vboxsf_stat(sbi, path, info);
- __putname(path);
- return err;
-}
-
-int vboxsf_inode_revalidate(struct dentry *dentry)
-{
- struct vboxsf_sbi *sbi;
- struct vboxsf_inode *sf_i;
- struct shfl_fsobjinfo info;
- struct timespec64 prev_mtime;
- struct inode *inode;
- int err;
-
- if (!dentry || !d_really_is_positive(dentry))
- return -EINVAL;
-
- inode = d_inode(dentry);
- prev_mtime = inode->i_mtime;
- sf_i = VBOXSF_I(inode);
- sbi = VBOXSF_SBI(dentry->d_sb);
- if (!sf_i->force_restat) {
- if (time_before(jiffies, dentry->d_time + sbi->o.ttl))
- return 0;
- }
-
- err = vboxsf_stat_dentry(dentry, &info);
- if (err)
- return err;
-
- dentry->d_time = jiffies;
- sf_i->force_restat = 0;
- vboxsf_init_inode(sbi, inode, &info);
-
- /*
- * If the file was changed on the host side we need to invalidate the
- * page-cache for it. Note this also gets triggered by our own writes,
- * this is unavoidable.
- */
- if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0)
- invalidate_inode_pages2(inode->i_mapping);
-
- return 0;
-}
-
-int vboxsf_getattr(const struct path *path, struct kstat *kstat,
- u32 request_mask, unsigned int flags)
-{
- int err;
- struct dentry *dentry = path->dentry;
- struct inode *inode = d_inode(dentry);
- struct vboxsf_inode *sf_i = VBOXSF_I(inode);
-
- switch (flags & AT_STATX_SYNC_TYPE) {
- case AT_STATX_DONT_SYNC:
- err = 0;
- break;
- case AT_STATX_FORCE_SYNC:
- sf_i->force_restat = 1;
- /* fall-through */
- default:
- err = vboxsf_inode_revalidate(dentry);
- }
- if (err)
- return err;
-
- generic_fillattr(d_inode(dentry), kstat);
- return 0;
-}
-
-int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr)
-{
- struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry));
- struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
- struct shfl_createparms params = {};
- struct shfl_fsobjinfo info = {};
- u32 buf_len;
- int err;
-
- params.handle = SHFL_HANDLE_NIL;
- params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS |
- SHFL_CF_ACT_FAIL_IF_NEW |
- SHFL_CF_ACCESS_ATTR_WRITE;
-
- /* this is at least required for Posix hosts */
- if (iattr->ia_valid & ATTR_SIZE)
- params.create_flags |= SHFL_CF_ACCESS_WRITE;
-
- err = vboxsf_create_at_dentry(dentry, &params);
- if (err || params.result != SHFL_FILE_EXISTS)
- return err ? err : -ENOENT;
-
-#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0)
-
- /*
- * Setting the file size and setting the other attributes has to
- * be handled separately.
- */
- if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) {
- if (iattr->ia_valid & ATTR_MODE) {
- info.attr.mode = mode_set(IRUSR);
- info.attr.mode |= mode_set(IWUSR);
- info.attr.mode |= mode_set(IXUSR);
- info.attr.mode |= mode_set(IRGRP);
- info.attr.mode |= mode_set(IWGRP);
- info.attr.mode |= mode_set(IXGRP);
- info.attr.mode |= mode_set(IROTH);
- info.attr.mode |= mode_set(IWOTH);
- info.attr.mode |= mode_set(IXOTH);
-
- if (iattr->ia_mode & S_IFDIR)
- info.attr.mode |= SHFL_TYPE_DIRECTORY;
- else
- info.attr.mode |= SHFL_TYPE_FILE;
- }
-
- if (iattr->ia_valid & ATTR_ATIME)
- info.access_time.ns_relative_to_unix_epoch =
- timespec64_to_ns(&iattr->ia_atime);
-
- if (iattr->ia_valid & ATTR_MTIME)
- info.modification_time.ns_relative_to_unix_epoch =
- timespec64_to_ns(&iattr->ia_mtime);
-
- /*
- * Ignore ctime (inode change time) as it can't be set
- * from userland anyway.
- */
-
- buf_len = sizeof(info);
- err = vboxsf_fsinfo(sbi->root, params.handle,
- SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len,
- &info);
- if (err) {
- vboxsf_close(sbi->root, params.handle);
- return err;
- }
-
- /* the host may have given us different attr then requested */
- sf_i->force_restat = 1;
- }
-
-#undef mode_set
-
- if (iattr->ia_valid & ATTR_SIZE) {
- memset(&info, 0, sizeof(info));
- info.size = iattr->ia_size;
- buf_len = sizeof(info);
- err = vboxsf_fsinfo(sbi->root, params.handle,
- SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len,
- &info);
- if (err) {
- vboxsf_close(sbi->root, params.handle);
- return err;
- }
-
- /* the host may have given us different attr then requested */
- sf_i->force_restat = 1;
- }
-
- vboxsf_close(sbi->root, params.handle);
-
- /* Update the inode with what the host has actually given us. */
- if (sf_i->force_restat)
- vboxsf_inode_revalidate(dentry);
-
- return 0;
-}
-
-/*
- * [dentry] contains string encoded in coding system that corresponds
- * to [sbi]->nls, we must convert it to UTF8 here.
- * Returns a shfl_string allocated through __getname (must be freed using
- * __putname), or an ERR_PTR on error.
- */
-struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
- struct dentry *dentry)
-{
- struct shfl_string *shfl_path;
- int path_len, out_len, nb;
- char *buf, *path;
- wchar_t uni;
- u8 *out;
-
- buf = __getname();
- if (!buf)
- return ERR_PTR(-ENOMEM);
-
- path = dentry_path_raw(dentry, buf, PATH_MAX);
- if (IS_ERR(path)) {
- __putname(buf);
- return (struct shfl_string *)path;
- }
- path_len = strlen(path);
-
- if (sbi->nls) {
- shfl_path = __getname();
- if (!shfl_path) {
- __putname(buf);
- return ERR_PTR(-ENOMEM);
- }
-
- out = shfl_path->string.utf8;
- out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1;
-
- while (path_len) {
- nb = sbi->nls->char2uni(path, path_len, &uni);
- if (nb < 0) {
- __putname(shfl_path);
- __putname(buf);
- return ERR_PTR(-EINVAL);
- }
- path += nb;
- path_len -= nb;
-
- nb = utf32_to_utf8(uni, out, out_len);
- if (nb < 0) {
- __putname(shfl_path);
- __putname(buf);
- return ERR_PTR(-ENAMETOOLONG);
- }
- out += nb;
- out_len -= nb;
- }
- *out = 0;
- shfl_path->length = out - shfl_path->string.utf8;
- shfl_path->size = shfl_path->length + 1;
- __putname(buf);
- } else {
- if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) {
- __putname(buf);
- return ERR_PTR(-ENAMETOOLONG);
- }
- /*
- * dentry_path stores the name at the end of buf, but the
- * shfl_string string we return must be properly aligned.
- */
- shfl_path = (struct shfl_string *)buf;
- memmove(shfl_path->string.utf8, path, path_len);
- shfl_path->string.utf8[path_len] = 0;
- shfl_path->length = path_len;
- shfl_path->size = path_len + 1;
- }
-
- return shfl_path;
-}
-
-int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
- const unsigned char *utf8_name, size_t utf8_len)
-{
- const char *in;
- char *out;
- size_t out_len;
- size_t out_bound_len;
- size_t in_bound_len;
-
- in = utf8_name;
- in_bound_len = utf8_len;
-
- out = name;
- out_len = 0;
- /* Reserve space for terminating 0 */
- out_bound_len = name_bound_len - 1;
-
- while (in_bound_len) {
- int nb;
- unicode_t uni;
-
- nb = utf8_to_utf32(in, in_bound_len, &uni);
- if (nb < 0)
- return -EINVAL;
-
- in += nb;
- in_bound_len -= nb;
-
- nb = sbi->nls->uni2char(uni, out, out_bound_len);
- if (nb < 0)
- return nb;
-
- out += nb;
- out_bound_len -= nb;
- out_len += nb;
- }
-
- *out = 0;
-
- return 0;
-}
-
-static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list)
-{
- struct vboxsf_dir_buf *b;
-
- b = kmalloc(sizeof(*b), GFP_KERNEL);
- if (!b)
- return NULL;
-
- b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL);
- if (!b->buf) {
- kfree(b);
- return NULL;
- }
-
- b->entries = 0;
- b->used = 0;
- b->free = DIR_BUFFER_SIZE;
- list_add(&b->head, list);
-
- return b;
-}
-
-static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b)
-{
- list_del(&b->head);
- kfree(b->buf);
- kfree(b);
-}
-
-struct vboxsf_dir_info *vboxsf_dir_info_alloc(void)
-{
- struct vboxsf_dir_info *p;
-
- p = kmalloc(sizeof(*p), GFP_KERNEL);
- if (!p)
- return NULL;
-
- INIT_LIST_HEAD(&p->info_list);
- return p;
-}
-
-void vboxsf_dir_info_free(struct vboxsf_dir_info *p)
-{
- struct list_head *list, *pos, *tmp;
-
- list = &p->info_list;
- list_for_each_safe(pos, tmp, list) {
- struct vboxsf_dir_buf *b;
-
- b = list_entry(pos, struct vboxsf_dir_buf, head);
- vboxsf_dir_buf_free(b);
- }
- kfree(p);
-}
-
-int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
- u64 handle)
-{
- struct vboxsf_dir_buf *b;
- u32 entries, size;
- int err = 0;
- void *buf;
-
- /* vboxsf_dirinfo returns 1 on end of dir */
- while (err == 0) {
- b = vboxsf_dir_buf_alloc(&sf_d->info_list);
- if (!b) {
- err = -ENOMEM;
- break;
- }
-
- buf = b->buf;
- size = b->free;
-
- err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0,
- &size, buf, &entries);
- if (err < 0)
- break;
-
- b->entries += entries;
- b->free -= size;
- b->used += size;
- }
-
- if (b && b->used == 0)
- vboxsf_dir_buf_free(b);
-
- /* -EILSEQ means the host could not translate a filename, ignore */
- if (err > 0 || err == -EILSEQ)
- err = 0;
-
- return err;
-}
diff --git a/drivers/staging/vboxsf/vboxsf_wrappers.c b/drivers/staging/vboxsf/vboxsf_wrappers.c
deleted file mode 100644
index bfc78a097dae..000000000000
--- a/drivers/staging/vboxsf/vboxsf_wrappers.c
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Wrapper functions for the shfl host calls.
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/vbox_err.h>
-#include <linux/vbox_utils.h>
-#include "vfsmod.h"
-
-#define SHFL_REQUEST \
- (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \
- VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN)
-
-static u32 vboxsf_client_id;
-
-int vboxsf_connect(void)
-{
- struct vbg_dev *gdev;
- struct vmmdev_hgcm_service_location loc;
- int err, vbox_status;
-
- loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING;
- strcpy(loc.u.localhost.service_name, "VBoxSharedFolders");
-
- gdev = vbg_get_gdev();
- if (IS_ERR(gdev))
- return -ENODEV; /* No guest-device */
-
- err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc,
- &vboxsf_client_id, &vbox_status);
- vbg_put_gdev(gdev);
-
- return err ? err : vbg_status_code_to_errno(vbox_status);
-}
-
-void vboxsf_disconnect(void)
-{
- struct vbg_dev *gdev;
- int vbox_status;
-
- gdev = vbg_get_gdev();
- if (IS_ERR(gdev))
- return; /* guest-device is gone, already disconnected */
-
- vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status);
- vbg_put_gdev(gdev);
-}
-
-static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status)
-{
- struct vbg_dev *gdev;
- int err, vbox_status;
-
- gdev = vbg_get_gdev();
- if (IS_ERR(gdev))
- return -ESHUTDOWN; /* guest-dev removed underneath us */
-
- err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function,
- U32_MAX, parms, parm_count, &vbox_status);
- vbg_put_gdev(gdev);
-
- if (err < 0)
- return err;
-
- if (status)
- *status = vbox_status;
-
- return vbg_status_code_to_errno(vbox_status);
-}
-
-int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root)
-{
- struct shfl_map_folder parms;
- int err, status;
-
- parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
- parms.path.u.pointer.size = shfl_string_buf_size(folder_name);
- parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = 0;
-
- parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.delimiter.u.value32 = '/';
-
- parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.case_sensitive.u.value32 = 1;
-
- err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER,
- &status);
- if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED)
- vbg_err("%s: Error host is too old\n", __func__);
-
- *root = parms.root.u.value32;
- return err;
-}
-
-int vboxsf_unmap_folder(u32 root)
-{
- struct shfl_unmap_folder parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms,
- SHFL_CPARMS_UNMAP_FOLDER, NULL);
-}
-
-/**
- * vboxsf_create - Create a new file or folder
- * @root: Root of the shared folder in which to create the file
- * @parsed_path: The path of the file or folder relative to the shared folder
- * @param: create_parms Parameters for file/folder creation.
- *
- * Create a new file or folder or open an existing one in a shared folder.
- * Note this function always returns 0 / success unless an exceptional condition
- * occurs - out of memory, invalid arguments, etc. If the file or folder could
- * not be opened or created, create_parms->handle will be set to
- * SHFL_HANDLE_NIL on return. In this case the value in create_parms->result
- * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result
- * is also set on success as additional information.
- *
- * Returns:
- * 0 or negative errno value.
- */
-int vboxsf_create(u32 root, struct shfl_string *parsed_path,
- struct shfl_createparms *create_parms)
-{
- struct shfl_create parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
- parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
- parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
-
- parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
- parms.parms.u.pointer.size = sizeof(struct shfl_createparms);
- parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms;
-
- return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL);
-}
-
-int vboxsf_close(u32 root, u64 handle)
-{
- struct shfl_close parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.handle.u.value64 = handle;
-
- return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL);
-}
-
-int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags)
-{
- struct shfl_remove parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
- parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
-
- parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.flags.u.value32 = flags;
-
- return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL);
-}
-
-int vboxsf_rename(u32 root, struct shfl_string *src_path,
- struct shfl_string *dest_path, u32 flags)
-{
- struct shfl_rename parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.src.u.pointer.size = shfl_string_buf_size(src_path);
- parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path;
-
- parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.dest.u.pointer.size = shfl_string_buf_size(dest_path);
- parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path;
-
- parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.flags.u.value32 = flags;
-
- return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL);
-}
-
-int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf)
-{
- struct shfl_read parms;
- int err;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.handle.u.value64 = handle;
- parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.offset.u.value64 = offset;
- parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.cb.u.value32 = *buf_len;
- parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
- parms.buffer.u.pointer.size = *buf_len;
- parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
-
- err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL);
-
- *buf_len = parms.cb.u.value32;
- return err;
-}
-
-int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf)
-{
- struct shfl_write parms;
- int err;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.handle.u.value64 = handle;
- parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.offset.u.value64 = offset;
- parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.cb.u.value32 = *buf_len;
- parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.buffer.u.pointer.size = *buf_len;
- parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
-
- err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL);
-
- *buf_len = parms.cb.u.value32;
- return err;
-}
-
-/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */
-int vboxsf_dirinfo(u32 root, u64 handle,
- struct shfl_string *parsed_path, u32 flags, u32 index,
- u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count)
-{
- struct shfl_list parms;
- int err, status;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.handle.u.value64 = handle;
- parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.flags.u.value32 = flags;
- parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.cb.u.value32 = *buf_len;
- if (parsed_path) {
- parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
- parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
- } else {
- parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN;
- parms.path.u.pointer.size = 0;
- parms.path.u.pointer.u.linear_addr = 0;
- }
-
- parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
- parms.buffer.u.pointer.size = *buf_len;
- parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
-
- parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.resume_point.u.value32 = index;
- parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.file_count.u.value32 = 0; /* out parameter only */
-
- err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status);
- if (err == -ENODATA && status == VERR_NO_MORE_FILES)
- err = 1;
-
- *buf_len = parms.cb.u.value32;
- *file_count = parms.file_count.u.value32;
- return err;
-}
-
-int vboxsf_fsinfo(u32 root, u64 handle, u32 flags,
- u32 *buf_len, void *buf)
-{
- struct shfl_information parms;
- int err;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
- parms.handle.u.value64 = handle;
- parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.flags.u.value32 = flags;
- parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.cb.u.value32 = *buf_len;
- parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
- parms.info.u.pointer.size = *buf_len;
- parms.info.u.pointer.u.linear_addr = (uintptr_t)buf;
-
- err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION,
- NULL);
-
- *buf_len = parms.cb.u.value32;
- return err;
-}
-
-int vboxsf_readlink(u32 root, struct shfl_string *parsed_path,
- u32 buf_len, u8 *buf)
-{
- struct shfl_readLink parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
- parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
-
- parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
- parms.buffer.u.pointer.size = buf_len;
- parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
-
- return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK,
- NULL);
-}
-
-int vboxsf_symlink(u32 root, struct shfl_string *new_path,
- struct shfl_string *old_path, struct shfl_fsobjinfo *buf)
-{
- struct shfl_symlink parms;
-
- parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
- parms.root.u.value32 = root;
-
- parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.new_path.u.pointer.size = shfl_string_buf_size(new_path);
- parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path;
-
- parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
- parms.old_path.u.pointer.size = shfl_string_buf_size(old_path);
- parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path;
-
- parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
- parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo);
- parms.info.u.pointer.u.linear_addr = (uintptr_t)buf;
-
- return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL);
-}
-
-int vboxsf_set_utf8(void)
-{
- return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL);
-}
-
-int vboxsf_set_symlinks(void)
-{
- return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL);
-}
diff --git a/drivers/staging/vboxsf/vfsmod.h b/drivers/staging/vboxsf/vfsmod.h
deleted file mode 100644
index 18f95b00fc33..000000000000
--- a/drivers/staging/vboxsf/vfsmod.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * VirtualBox Guest Shared Folders support: module header.
- *
- * Copyright (C) 2006-2018 Oracle Corporation
- */
-
-#ifndef VFSMOD_H
-#define VFSMOD_H
-
-#include <linux/backing-dev.h>
-#include <linux/idr.h>
-#include "shfl_hostintf.h"
-
-#define DIR_BUFFER_SIZE SZ_16K
-
-/* The cast is to prevent assignment of void * to pointers of arbitrary type */
-#define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info)
-#define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode)
-
-struct vboxsf_options {
- unsigned long ttl;
- kuid_t uid;
- kgid_t gid;
- bool dmode_set;
- bool fmode_set;
- umode_t dmode;
- umode_t fmode;
- umode_t dmask;
- umode_t fmask;
-};
-
-struct vboxsf_fs_context {
- struct vboxsf_options o;
- char *nls_name;
-};
-
-/* per-shared folder information */
-struct vboxsf_sbi {
- struct vboxsf_options o;
- struct shfl_fsobjinfo root_info;
- struct idr ino_idr;
- spinlock_t ino_idr_lock; /* This protects ino_idr */
- struct nls_table *nls;
- u32 next_generation;
- u32 root;
- int bdi_id;
-};
-
-/* per-inode information */
-struct vboxsf_inode {
- /* some information was changed, update data on next revalidate */
- int force_restat;
- /* list of open handles for this inode + lock protecting it */
- struct list_head handle_list;
- /* This mutex protects handle_list accesses */
- struct mutex handle_list_mutex;
- /* The VFS inode struct */
- struct inode vfs_inode;
-};
-
-struct vboxsf_dir_info {
- struct list_head info_list;
-};
-
-struct vboxsf_dir_buf {
- size_t entries;
- size_t free;
- size_t used;
- void *buf;
- struct list_head head;
-};
-
-/* globals */
-extern const struct inode_operations vboxsf_dir_iops;
-extern const struct inode_operations vboxsf_lnk_iops;
-extern const struct inode_operations vboxsf_reg_iops;
-extern const struct file_operations vboxsf_dir_fops;
-extern const struct file_operations vboxsf_reg_fops;
-extern const struct address_space_operations vboxsf_reg_aops;
-extern const struct dentry_operations vboxsf_dentry_ops;
-
-/* from utils.c */
-struct inode *vboxsf_new_inode(struct super_block *sb);
-void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
- const struct shfl_fsobjinfo *info);
-int vboxsf_create_at_dentry(struct dentry *dentry,
- struct shfl_createparms *params);
-int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
- struct shfl_fsobjinfo *info);
-int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info);
-int vboxsf_inode_revalidate(struct dentry *dentry);
-int vboxsf_getattr(const struct path *path, struct kstat *kstat,
- u32 request_mask, unsigned int query_flags);
-int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr);
-struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
- struct dentry *dentry);
-int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
- const unsigned char *utf8_name, size_t utf8_len);
-struct vboxsf_dir_info *vboxsf_dir_info_alloc(void);
-void vboxsf_dir_info_free(struct vboxsf_dir_info *p);
-int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
- u64 handle);
-
-/* from vboxsf_wrappers.c */
-int vboxsf_connect(void);
-void vboxsf_disconnect(void);
-
-int vboxsf_create(u32 root, struct shfl_string *parsed_path,
- struct shfl_createparms *create_parms);
-
-int vboxsf_close(u32 root, u64 handle);
-int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags);
-int vboxsf_rename(u32 root, struct shfl_string *src_path,
- struct shfl_string *dest_path, u32 flags);
-
-int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf);
-int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf);
-
-int vboxsf_dirinfo(u32 root, u64 handle,
- struct shfl_string *parsed_path, u32 flags, u32 index,
- u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count);
-int vboxsf_fsinfo(u32 root, u64 handle, u32 flags,
- u32 *buf_len, void *buf);
-
-int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root);
-int vboxsf_unmap_folder(u32 root);
-
-int vboxsf_readlink(u32 root, struct shfl_string *parsed_path,
- u32 buf_len, u8 *buf);
-int vboxsf_symlink(u32 root, struct shfl_string *new_path,
- struct shfl_string *old_path, struct shfl_fsobjinfo *buf);
-
-int vboxsf_set_utf8(void);
-int vboxsf_set_symlinks(void);
-
-#endif
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index cc12772d0a4d..497f979018c2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -803,7 +803,12 @@ success:
continue;
if (cookie->inodes[i]) {
- afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]),
+ struct afs_vnode *iv = AFS_FS_I(cookie->inodes[i]);
+
+ if (test_bit(AFS_VNODE_UNSET, &iv->flags))
+ continue;
+
+ afs_vnode_commit_status(&fc, iv,
scb->cb_break, NULL, scb);
continue;
}
diff --git a/fs/aio.c b/fs/aio.c
index 01e0fb9ae45a..0d9a559d488c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2179,7 +2179,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
#ifdef CONFIG_COMPAT
struct __compat_aio_sigset {
- compat_sigset_t __user *sigmask;
+ compat_uptr_t sigmask;
compat_size_t sigsetsize;
};
@@ -2193,7 +2193,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
struct old_timespec32 __user *, timeout,
const struct __compat_aio_sigset __user *, usig)
{
- struct __compat_aio_sigset ksig = { NULL, };
+ struct __compat_aio_sigset ksig = { 0, };
struct timespec64 t;
bool interrupted;
int ret;
@@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
+ ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
if (ret)
return ret;
@@ -2228,7 +2228,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
struct __kernel_timespec __user *, timeout,
const struct __compat_aio_sigset __user *, usig)
{
- struct __compat_aio_sigset ksig = { NULL, };
+ struct __compat_aio_sigset ksig = { 0, };
struct timespec64 t;
bool interrupted;
int ret;
@@ -2239,7 +2239,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
+ ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
if (ret)
return ret;
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index 2866fabf497f..91f5787dae7c 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -459,9 +459,10 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
*/
how &= ~AUTOFS_EXP_LEAVES;
found = should_expire(expired, mnt, timeout, how);
- if (!found || found != expired)
- /* Something has changed, continue */
+ if (found != expired) { // something has changed, continue
+ dput(found);
goto next;
+ }
if (expired != dentry)
dput(dentry);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6dc4dd16cf7..015910079e73 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9744,6 +9744,18 @@ out_fail:
commit_transaction = true;
}
if (commit_transaction) {
+ /*
+ * We may have set commit_transaction when logging the new name
+ * in the destination root, in which case we left the source
+ * root context in the list of log contextes. So make sure we
+ * remove it to avoid invalid memory accesses, since the context
+ * was allocated in our stack frame.
+ */
+ if (sync_log_root) {
+ mutex_lock(&root->log_mutex);
+ list_del_init(&ctx_root.list);
+ mutex_unlock(&root->log_mutex);
+ }
ret = btrfs_commit_transaction(trans);
} else {
int ret2;
@@ -9757,6 +9769,9 @@ out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
+ ASSERT(list_empty(&ctx_root.list));
+ ASSERT(list_empty(&ctx_dest.list));
+
return ret;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bd77adb64bfd..8de633964dc3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -753,6 +753,9 @@ static void ceph_aio_complete(struct inode *inode,
if (!atomic_dec_and_test(&aio_req->pending_reqs))
return;
+ if (aio_req->iocb->ki_flags & IOCB_DIRECT)
+ inode_dio_end(inode);
+
ret = aio_req->error;
if (!ret)
ret = aio_req->total_len;
@@ -1091,6 +1094,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
CEPH_CAP_FILE_RD);
list_splice(&aio_req->osd_reqs, &osd_reqs);
+ inode_dio_begin(inode);
while (!list_empty(&osd_reqs)) {
req = list_first_entry(&osd_reqs,
struct ceph_osd_request,
@@ -1264,14 +1268,24 @@ again:
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ceph_start_io_direct(inode);
+ else
+ ceph_start_io_read(inode);
+
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
&got, &pinned_page);
- if (ret < 0)
+ if (ret < 0) {
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ceph_end_io_direct(inode);
+ else
+ ceph_end_io_read(inode);
return ret;
+ }
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_flags & IOCB_DIRECT) ||
@@ -1283,16 +1297,12 @@ again:
if (ci->i_inline_version == CEPH_INLINE_NONE) {
if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
- ceph_start_io_direct(inode);
ret = ceph_direct_read_write(iocb, to,
NULL, NULL);
- ceph_end_io_direct(inode);
if (ret >= 0 && ret < len)
retry_op = CHECK_EOF;
} else {
- ceph_start_io_read(inode);
ret = ceph_sync_read(iocb, to, &retry_op);
- ceph_end_io_read(inode);
}
} else {
retry_op = READ_INLINE;
@@ -1303,11 +1313,10 @@ again:
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
ceph_add_rw_context(fi, &rw_ctx);
- ceph_start_io_read(inode);
ret = generic_file_read_iter(iocb, to);
- ceph_end_io_read(inode);
ceph_del_rw_context(fi, &rw_ctx);
}
+
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
if (pinned_page) {
@@ -1315,6 +1324,12 @@ again:
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
+
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ceph_end_io_direct(inode);
+ else
+ ceph_end_io_read(inode);
+
if (retry_op > HAVE_RETRIED && ret >= 0) {
int statret;
struct page *page = NULL;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 18426f4855f1..e23752d9a79f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -128,13 +128,20 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
struct dentry *lower_dir_dentry;
+ struct inode *lower_dir_inode;
int rc;
- dget(lower_dentry);
- lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
+ lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+ lower_dir_inode = d_inode(lower_dir_dentry);
+ inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT);
+ dget(lower_dentry); // don't even try to make the lower negative
+ if (lower_dentry->d_parent != lower_dir_dentry)
+ rc = -EINVAL;
+ else if (d_unhashed(lower_dentry))
+ rc = -EINVAL;
+ else
+ rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -142,10 +149,11 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
fsstack_copy_attr_times(dir, lower_dir_inode);
set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
inode->i_ctime = dir->i_ctime;
- d_drop(dentry);
out_unlock:
- unlock_dir(lower_dir_dentry);
dput(lower_dentry);
+ inode_unlock(lower_dir_inode);
+ if (!rc)
+ d_drop(dentry);
return rc;
}
@@ -311,9 +319,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode)
static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
struct dentry *lower_dentry)
{
- struct inode *inode, *lower_inode = d_inode(lower_dentry);
+ struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent);
+ struct inode *inode, *lower_inode;
struct ecryptfs_dentry_info *dentry_info;
- struct vfsmount *lower_mnt;
int rc = 0;
dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
@@ -322,16 +330,23 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
return ERR_PTR(-ENOMEM);
}
- lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
fsstack_copy_attr_atime(d_inode(dentry->d_parent),
- d_inode(lower_dentry->d_parent));
+ d_inode(path->dentry));
BUG_ON(!d_count(lower_dentry));
ecryptfs_set_dentry_private(dentry, dentry_info);
- dentry_info->lower_path.mnt = lower_mnt;
+ dentry_info->lower_path.mnt = mntget(path->mnt);
dentry_info->lower_path.dentry = lower_dentry;
- if (d_really_is_negative(lower_dentry)) {
+ /*
+ * negative dentry can go positive under us here - its parent is not
+ * locked. That's OK and that could happen just as we return from
+ * ecryptfs_lookup() anyway. Just need to be careful and fetch
+ * ->d_inode only once - it's not stable here.
+ */
+ lower_inode = READ_ONCE(lower_dentry->d_inode);
+
+ if (!lower_inode) {
/* We want to add because we couldn't find in lower */
d_add(dentry, NULL);
return NULL;
@@ -512,22 +527,30 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct dentry *lower_dentry;
struct dentry *lower_dir_dentry;
+ struct inode *lower_dir_inode;
int rc;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
- dget(dentry);
- lower_dir_dentry = lock_parent(lower_dentry);
- dget(lower_dentry);
- rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry);
- dput(lower_dentry);
- if (!rc && d_really_is_positive(dentry))
+ lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+ lower_dir_inode = d_inode(lower_dir_dentry);
+
+ inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT);
+ dget(lower_dentry); // don't even try to make the lower negative
+ if (lower_dentry->d_parent != lower_dir_dentry)
+ rc = -EINVAL;
+ else if (d_unhashed(lower_dentry))
+ rc = -EINVAL;
+ else
+ rc = vfs_rmdir(lower_dir_inode, lower_dentry);
+ if (!rc) {
clear_nlink(d_inode(dentry));
- fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
- set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
- unlock_dir(lower_dir_dentry);
+ fsstack_copy_attr_times(dir, lower_dir_inode);
+ set_nlink(dir, lower_dir_inode->i_nlink);
+ }
+ dput(lower_dentry);
+ inode_unlock(lower_dir_inode);
if (!rc)
d_drop(dentry);
- dput(dentry);
return rc;
}
@@ -565,20 +588,22 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *lower_new_dentry;
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
- struct dentry *trap = NULL;
+ struct dentry *trap;
struct inode *target_inode;
if (flags)
return -EINVAL;
+ lower_old_dir_dentry = ecryptfs_dentry_to_lower(old_dentry->d_parent);
+ lower_new_dir_dentry = ecryptfs_dentry_to_lower(new_dentry->d_parent);
+
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
- dget(lower_old_dentry);
- dget(lower_new_dentry);
- lower_old_dir_dentry = dget_parent(lower_old_dentry);
- lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
target_inode = d_inode(new_dentry);
+
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ dget(lower_new_dentry);
rc = -EINVAL;
if (lower_old_dentry->d_parent != lower_old_dir_dentry)
goto out_lock;
@@ -606,11 +631,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir != old_dir)
fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
out_lock:
- unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
- dput(lower_new_dir_dentry);
- dput(lower_old_dir_dentry);
dput(lower_new_dentry);
- dput(lower_old_dentry);
+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
return rc;
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 09bc68708d28..2dd55b172d57 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -519,26 +519,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
* inode is actually connected to the parent.
*/
err = exportfs_get_name(mnt, target_dir, nbuf, result);
- if (!err) {
- inode_lock(target_dir->d_inode);
- nresult = lookup_one_len(nbuf, target_dir,
- strlen(nbuf));
- inode_unlock(target_dir->d_inode);
- if (!IS_ERR(nresult)) {
- if (nresult->d_inode) {
- dput(result);
- result = nresult;
- } else
- dput(nresult);
- }
+ if (err) {
+ dput(target_dir);
+ goto err_result;
}
+ inode_lock(target_dir->d_inode);
+ nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
+ if (!IS_ERR(nresult)) {
+ if (unlikely(nresult->d_inode != result->d_inode)) {
+ dput(nresult);
+ nresult = ERR_PTR(-ESTALE);
+ }
+ }
+ inode_unlock(target_dir->d_inode);
/*
* At this point we are done with the parent, but it's pinned
* by the child dentry anyway.
*/
dput(target_dir);
+ if (IS_ERR(nresult)) {
+ err = PTR_ERR(nresult);
+ goto err_result;
+ }
+ dput(result);
+ result = nresult;
+
/*
* And finally make sure the dentry is actually acceptable
* to NFSD.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f9a38998f2fc..2c819c3c855d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -326,6 +326,7 @@ struct io_kiocb {
#define REQ_F_TIMEOUT 1024 /* timeout request */
#define REQ_F_ISREG 2048 /* regular file */
#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
+#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
u64 user_data;
u32 result;
u32 sequence;
@@ -453,9 +454,13 @@ static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
struct io_kiocb *req;
req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list);
- if (req && !__io_sequence_defer(ctx, req)) {
- list_del_init(&req->list);
- return req;
+ if (req) {
+ if (req->flags & REQ_F_TIMEOUT_NOSEQ)
+ return NULL;
+ if (!__io_sequence_defer(ctx, req)) {
+ list_del_init(&req->list);
+ return req;
+ }
}
return NULL;
@@ -1225,7 +1230,7 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
}
}
- return 0;
+ return len;
}
static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
@@ -1941,18 +1946,24 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr)))
return -EFAULT;
+ req->flags |= REQ_F_TIMEOUT;
+
/*
* sqe->off holds how many events that need to occur for this
- * timeout event to be satisfied.
+ * timeout event to be satisfied. If it isn't set, then this is
+ * a pure timeout request, sequence isn't used.
*/
count = READ_ONCE(sqe->off);
- if (!count)
- count = 1;
+ if (!count) {
+ req->flags |= REQ_F_TIMEOUT_NOSEQ;
+ spin_lock_irq(&ctx->completion_lock);
+ entry = ctx->timeout_list.prev;
+ goto add;
+ }
req->sequence = ctx->cached_sq_head + count - 1;
/* reuse it to store the count */
req->submit.sequence = count;
- req->flags |= REQ_F_TIMEOUT;
/*
* Insertion sort, ensuring the first entry in the list is always
@@ -1964,6 +1975,9 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
unsigned nxt_sq_head;
long long tmp, tmp_nxt;
+ if (nxt->flags & REQ_F_TIMEOUT_NOSEQ)
+ continue;
+
/*
* Since cached_sq_head + count - 1 can overflow, use type long
* long to store it.
@@ -1990,6 +2004,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
nxt->sequence++;
}
req->sequence -= span;
+add:
list_add(&req->list, entry);
spin_unlock_irq(&ctx->completion_lock);
@@ -2283,6 +2298,7 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe)
switch (op) {
case IORING_OP_NOP:
case IORING_OP_POLL_REMOVE:
+ case IORING_OP_TIMEOUT:
return false;
default:
return true;
diff --git a/fs/namespace.c b/fs/namespace.c
index fe0e9e1410fe..2adfe7b166a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2478,8 +2478,10 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
time64_to_tm(sb->s_time_max, 0, &tm);
- pr_warn("Mounted %s file system at %s supports timestamps until %04ld (0x%llx)\n",
- sb->s_type->name, mntpath,
+ pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
+ sb->s_type->name,
+ is_mounted(mnt) ? "remounted" : "mounted",
+ mntpath,
tm.tm_year+1900, (unsigned long long)sb->s_time_max);
free_page((unsigned long)buf);
@@ -2764,14 +2766,11 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
- if (error < 0) {
- mntput(mnt);
- return error;
- }
-
mnt_warn_timestamp_expiry(mountpoint, mnt);
+ error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
+ if (error < 0)
+ mntput(mnt);
return error;
}
diff --git a/include/dt-bindings/pmu/exynos_ppmu.h b/include/dt-bindings/pmu/exynos_ppmu.h
new file mode 100644
index 000000000000..8724abe130f3
--- /dev/null
+++ b/include/dt-bindings/pmu/exynos_ppmu.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Samsung Exynos PPMU event types for counting in regs
+ *
+ * Copyright (c) 2019, Samsung Electronics
+ * Author: Lukasz Luba <l.luba@partner.samsung.com>
+ */
+
+#ifndef __DT_BINDINGS_PMU_EXYNOS_PPMU_H
+#define __DT_BINDINGS_PMU_EXYNOS_PPMU_H
+
+#define PPMU_RO_BUSY_CYCLE_CNT 0x0
+#define PPMU_WO_BUSY_CYCLE_CNT 0x1
+#define PPMU_RW_BUSY_CYCLE_CNT 0x2
+#define PPMU_RO_REQUEST_CNT 0x3
+#define PPMU_WO_REQUEST_CNT 0x4
+#define PPMU_RO_DATA_CNT 0x5
+#define PPMU_WO_DATA_CNT 0x6
+#define PPMU_RO_LATENCY 0x12
+#define PPMU_WO_LATENCY 0x16
+#define PPMU_V2_RO_DATA_CNT 0x4
+#define PPMU_V2_WO_DATA_CNT 0x5
+#define PPMU_V2_EVT3_RW_DATA_CNT 0x22
+
+#endif
diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 8339071ab08b..e20a0cd09ba5 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -65,5 +65,6 @@ extern void can_rx_unregister(struct net *net, struct net_device *dev,
void *data);
extern int can_send(struct sk_buff *skb, int loop);
+void can_sock_destruct(struct sock *sk);
#endif /* !_CAN_CORE_H */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d0633ebdaa9c..1ca2baf817ed 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -59,6 +59,11 @@ extern ssize_t cpu_show_l1tf(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_mds(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+extern ssize_t cpu_show_itlb_multihit(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
@@ -179,7 +184,12 @@ void arch_cpu_idle_dead(void);
int cpu_report_state(int cpu);
int cpu_check_up_prepare(int cpu);
void cpu_set_state_online(int cpu);
-void play_idle(unsigned long duration_us);
+void play_idle_precise(u64 duration_ns, u64 latency_ns);
+
+static inline void play_idle(unsigned long duration_us)
+{
+ play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX);
+}
#ifdef CONFIG_HOTPLUG_CPU
bool cpu_wait_death(unsigned int cpu, int seconds);
@@ -213,28 +223,7 @@ static inline int cpuhp_smt_enable(void) { return 0; }
static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
#endif
-/*
- * These are used for a global "mitigations=" cmdline option for toggling
- * optional CPU mitigations.
- */
-enum cpu_mitigations {
- CPU_MITIGATIONS_OFF,
- CPU_MITIGATIONS_AUTO,
- CPU_MITIGATIONS_AUTO_NOSMT,
-};
-
-extern enum cpu_mitigations cpu_mitigations;
-
-/* mitigations=off */
-static inline bool cpu_mitigations_off(void)
-{
- return cpu_mitigations == CPU_MITIGATIONS_OFF;
-}
-
-/* mitigations=auto,nosmt */
-static inline bool cpu_mitigations_auto_nosmt(void)
-{
- return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
-}
+extern bool cpu_mitigations_off(void);
+extern bool cpu_mitigations_auto_nosmt(void);
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4b6b5bea8f79..2dbe46b7c213 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -29,10 +29,13 @@ struct cpuidle_driver;
* CPUIDLE DEVICE INTERFACE *
****************************/
+#define CPUIDLE_STATE_DISABLED_BY_USER BIT(0)
+#define CPUIDLE_STATE_DISABLED_BY_DRIVER BIT(1)
+
struct cpuidle_state_usage {
unsigned long long disable;
unsigned long long usage;
- unsigned long long time; /* in US */
+ u64 time_ns;
unsigned long long above; /* Number of times it's been too deep */
unsigned long long below; /* Number of times it's been too shallow */
#ifdef CONFIG_SUSPEND
@@ -45,6 +48,8 @@ struct cpuidle_state {
char name[CPUIDLE_NAME_LEN];
char desc[CPUIDLE_DESC_LEN];
+ u64 exit_latency_ns;
+ u64 target_residency_ns;
unsigned int flags;
unsigned int exit_latency; /* in US */
int power_usage; /* in mW */
@@ -80,14 +85,14 @@ struct cpuidle_driver_kobj;
struct cpuidle_device {
unsigned int registered:1;
unsigned int enabled:1;
- unsigned int use_deepest_state:1;
unsigned int poll_time_limit:1;
unsigned int cpu;
ktime_t next_hrtimer;
int last_state_idx;
- int last_residency;
+ u64 last_residency_ns;
u64 poll_limit_ns;
+ u64 forced_idle_latency_limit_ns;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_driver_kobj *kobj_driver;
@@ -144,6 +149,8 @@ extern int cpuidle_register_driver(struct cpuidle_driver *drv);
extern struct cpuidle_driver *cpuidle_get_driver(void);
extern struct cpuidle_driver *cpuidle_driver_ref(void);
extern void cpuidle_driver_unref(void);
+extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
+ bool disable);
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
extern int cpuidle_register_device(struct cpuidle_device *dev);
extern void cpuidle_unregister_device(struct cpuidle_device *dev);
@@ -181,6 +188,8 @@ static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; }
static inline void cpuidle_driver_unref(void) {}
+static inline void cpuidle_driver_state_disabled(struct cpuidle_driver *drv,
+ int idx, bool disable) { }
static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
static inline int cpuidle_register_device(struct cpuidle_device *dev)
{return -ENODEV; }
@@ -204,18 +213,20 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
#ifdef CONFIG_CPU_IDLE
extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ u64 latency_limit_ns);
extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
-extern void cpuidle_use_deepest_state(bool enable);
+extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
#else
static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev,
+ u64 latency_limit_ns)
{return -ENODEV; }
static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return -ENODEV; }
-static inline void cpuidle_use_deepest_state(bool enable)
+static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
{
}
#endif
@@ -260,7 +271,7 @@ struct cpuidle_governor {
#ifdef CONFIG_CPU_IDLE
extern int cpuidle_register_governor(struct cpuidle_governor *gov);
-extern int cpuidle_governor_latency_req(unsigned int cpu);
+extern s64 cpuidle_governor_latency_req(unsigned int cpu);
#else
static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
{return 0;}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ed11ef594378..6d8bf4bdf240 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -336,7 +336,8 @@ enum {
#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16)
#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
-#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
#define QI_DEV_IOTLB_SIZE 1
#define QI_DEV_IOTLB_MAX_INVS 32
@@ -360,7 +361,8 @@ enum {
#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16)
#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
-#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
#define QI_DEV_EIOTLB_MAX_INVS 32
/* Page group response descriptor QW0 */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 719fc3e15ea4..d41c521a39da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -966,6 +966,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
@@ -1382,4 +1383,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr);
+
#endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 0ebb105eb261..4c75dae8dd29 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -119,6 +119,7 @@ extern struct memory_block *find_memory_block(struct mem_section *);
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
+extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 4c441be03079..e057d1fa2469 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -637,6 +637,7 @@ extern void dev_pm_put_subsys_data(struct device *dev);
* struct dev_pm_domain - power management domain representation.
*
* @ops: Power management operations associated with this domain.
+ * @start: Called when a user needs to start the device via the domain.
* @detach: Called when removing a device from the domain.
* @activate: Called before executing probe routines for bus types and drivers.
* @sync: Called after successful driver probe.
@@ -648,6 +649,7 @@ extern void dev_pm_put_subsys_data(struct device *dev);
*/
struct dev_pm_domain {
struct dev_pm_ops ops;
+ int (*start)(struct device *dev);
void (*detach)(struct device *dev, bool power_off);
int (*activate)(struct device *dev);
void (*sync)(struct device *dev);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index baf02ff91a31..5a31c711b896 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -366,6 +366,7 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev,
struct device *dev_pm_domain_attach_by_name(struct device *dev,
const char *name);
void dev_pm_domain_detach(struct device *dev, bool power_off);
+int dev_pm_domain_start(struct device *dev);
void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
#else
static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
@@ -383,6 +384,10 @@ static inline struct device *dev_pm_domain_attach_by_name(struct device *dev,
return NULL;
}
static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
+static inline int dev_pm_domain_start(struct device *dev)
+{
+ return 0;
+}
static inline void dev_pm_domain_set(struct device *dev,
struct dev_pm_domain *pd) {}
#endif
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index b8197ab014f2..747861816f4f 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -22,6 +22,7 @@ struct opp_table;
enum dev_pm_opp_event {
OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
+ OPP_EVENT_ADJUST_VOLTAGE,
};
/**
@@ -113,6 +114,10 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq,
void dev_pm_opp_remove(struct device *dev, unsigned long freq);
void dev_pm_opp_remove_all_dynamic(struct device *dev);
+int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
+ unsigned long u_volt, unsigned long u_volt_min,
+ unsigned long u_volt_max);
+
int dev_pm_opp_enable(struct device *dev, unsigned long freq);
int dev_pm_opp_disable(struct device *dev, unsigned long freq);
@@ -242,6 +247,14 @@ static inline void dev_pm_opp_remove_all_dynamic(struct device *dev)
{
}
+static inline int
+dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
+ unsigned long u_volt, unsigned long u_volt_min,
+ unsigned long u_volt_max)
+{
+ return 0;
+}
+
static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
{
return 0;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 23e4b65ec9df..2116c88663a1 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -38,7 +38,8 @@ struct devlink {
struct device *dev;
possible_net_t _net;
struct mutex lock;
- bool reload_failed;
+ u8 reload_failed:1,
+ reload_enabled:1;
char priv[0] __aligned(NETDEV_ALIGN);
};
@@ -774,6 +775,8 @@ struct ib_device;
struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
int devlink_register(struct devlink *devlink, struct device *dev);
void devlink_unregister(struct devlink *devlink);
+void devlink_reload_enable(struct devlink *devlink);
+void devlink_reload_disable(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 2bc9960a31aa..cf97f6339acb 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s\n",
+ TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s",
__entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
show_tcp_state_name(__entry->state))
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 580b7a2e40e1..a8a2174db030 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -421,6 +421,7 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */
+ DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, /* u64 */
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 59e89a1bc3bb..9dc9d0079e98 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -31,13 +31,16 @@
#define PTP_ENABLE_FEATURE (1<<0)
#define PTP_RISING_EDGE (1<<1)
#define PTP_FALLING_EDGE (1<<2)
+#define PTP_STRICT_FLAGS (1<<3)
+#define PTP_EXTTS_EDGES (PTP_RISING_EDGE | PTP_FALLING_EDGE)
/*
* flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl.
*/
#define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \
PTP_RISING_EDGE | \
- PTP_FALLING_EDGE)
+ PTP_FALLING_EDGE | \
+ PTP_STRICT_FLAGS)
/*
* flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 1f31c2f1e6fc..4508d5e0cf69 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- inode_unlock(d_backing_inode(parent->dentry));
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d->d_sb->s_dev;
watch->ino = d_backing_inode(d)->i_ino;
}
+ inode_unlock(d_backing_inode(parent->dentry));
dput(d);
return 0;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 080561bb8a4b..ef4242e5d4bc 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc)
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root);
- fc->root = nsdentry;
if (IS_ERR(nsdentry)) {
- ret = PTR_ERR(nsdentry);
deactivate_locked_super(sb);
+ ret = PTR_ERR(nsdentry);
+ nsdentry = NULL;
}
+ fc->root = nsdentry;
}
if (!ctx->kfc.new_sb_created)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fc28e17940e0..e2cad3ee2ead 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2373,7 +2373,18 @@ void __init boot_cpu_hotplug_init(void)
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
}
-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+ CPU_MITIGATIONS_OFF,
+ CPU_MITIGATIONS_AUTO,
+ CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+static enum cpu_mitigations cpu_mitigations __ro_after_init =
+ CPU_MITIGATIONS_AUTO;
static int __init mitigations_parse_cmdline(char *arg)
{
@@ -2390,3 +2401,17 @@ static int __init mitigations_parse_cmdline(char *arg)
return 0;
}
early_param("mitigations", mitigations_parse_cmdline);
+
+/* mitigations=off */
+bool cpu_mitigations_off(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_off);
+
+/* mitigations=auto,nosmt */
+bool cpu_mitigations_auto_nosmt(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index aec8dba2bea4..00a014670ed0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1031,7 +1031,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
{
}
-void
+static inline void
perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
{
}
@@ -10535,6 +10535,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_ns;
}
+ /*
+ * Disallow uncore-cgroup events, they don't make sense as the cgroup will
+ * be different on other CPUs in the uncore mask.
+ */
+ if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) {
+ err = -EINVAL;
+ goto err_pmu;
+ }
+
if (event->attr.aux_output &&
!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
err = -EOPNOTSUPP;
@@ -11323,8 +11332,11 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
int err;
/*
- * Get the target context (task or percpu):
+ * Grouping is not supported for kernel events, neither is 'AUX',
+ * make sure the caller's intentions are adjusted.
*/
+ if (attr->aux_output)
+ return ERR_PTR(-EINVAL);
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
overflow_handler, context, -1);
@@ -11336,6 +11348,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
/* Mark owner so we could distinguish it from user events. */
event->owner = TASK_TOMBSTONE;
+ /*
+ * Get the target context (task or percpu):
+ */
ctx = find_get_context(event->pmu, task, event);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
@@ -11787,7 +11802,7 @@ inherit_event(struct perf_event *parent_event,
GFP_KERNEL);
if (!child_ctx->task_ctx_data) {
free_event(child_event);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
}
@@ -11890,7 +11905,7 @@ static int inherit_group(struct perf_event *parent_event,
if (IS_ERR(child_ctr))
return PTR_ERR(child_ctr);
- if (sub->aux_event == parent_event &&
+ if (sub->aux_event == parent_event && child_ctr &&
!perf_get_aux_event(child_ctr, leader))
return -EINVAL;
}
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 04e83fdfbe80..a45cba7df0ae 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -814,6 +814,8 @@ EXPORT_SYMBOL_GPL(freq_qos_update_request);
*/
int freq_qos_remove_request(struct freq_qos_request *req)
{
+ int ret;
+
if (!req)
return -EINVAL;
@@ -821,7 +823,11 @@ int freq_qos_remove_request(struct freq_qos_request *req)
"%s() called for unknown object\n", __func__))
return -EINVAL;
- return freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+ ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+ req->qos = NULL;
+ req->type = 0;
+
+ return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_remove_request);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 83105874f255..26b9168321e7 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -734,8 +734,15 @@ zone_found:
* We have found the zone. Now walk the radix tree to find the leaf node
* for our PFN.
*/
+
+ /*
+ * If the zone we wish to scan is the the current zone and the
+ * pfn falls into the current node then we do not need to walk
+ * the tree.
+ */
node = bm->cur.node;
- if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
+ if (zone == bm->cur.zone &&
+ ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
goto node_found;
node = zone->rtree;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0f2eb3629070..44123b4d14e8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1065,7 +1065,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
* affecting a valid clamp bucket, the next time it's enqueued,
* it will already see the updated clamp bucket value.
*/
- if (!p->uclamp[clamp_id].active) {
+ if (p->uclamp[clamp_id].active) {
uclamp_rq_dec_id(rq, p, clamp_id);
uclamp_rq_inc_id(rq, p, clamp_id);
}
@@ -6019,10 +6019,11 @@ void init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
+ __sched_fork(0, idle);
+
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_lock(&rq->lock);
- __sched_fork(0, idle);
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22a2fed29054..69a81a5709ff 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7548,6 +7548,19 @@ static void update_blocked_averages(int cpu)
update_rq_clock(rq);
/*
+ * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
+ * that RT, DL and IRQ signals have been updated before updating CFS.
+ */
+ curr_class = rq->curr->sched_class;
+ update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
+ update_irq_load_avg(rq, 0);
+
+ /* Don't need periodic decay once load/util_avg are null */
+ if (others_have_blocked(rq))
+ done = false;
+
+ /*
* Iterates the task_group tree in a bottom up fashion, see
* list_add_leaf_cfs_rq() for details.
*/
@@ -7574,14 +7587,6 @@ static void update_blocked_averages(int cpu)
done = false;
}
- curr_class = rq->curr->sched_class;
- update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
- update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
- update_irq_load_avg(rq, 0);
- /* Don't need periodic decay once load/util_avg are null */
- if (others_have_blocked(rq))
- done = false;
-
update_blocked_load_status(rq, !done);
rq_unlock_irqrestore(rq, &rf);
}
@@ -7642,12 +7647,18 @@ static inline void update_blocked_averages(int cpu)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
- update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
+ /*
+ * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
+ * that RT, DL and IRQ signals have been updated before updating CFS.
+ */
curr_class = rq->curr->sched_class;
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
+
+ update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
+
update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
rq_unlock_irqrestore(rq, &rf);
}
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f65ef1e2f204..80167b1bd98a 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* update no idle residency and return.
*/
if (current_clr_polling_and_test()) {
- dev->last_residency = 0;
+ dev->last_residency_ns = 0;
local_irq_enable();
return -EBUSY;
}
@@ -165,7 +165,9 @@ static void cpuidle_idle_call(void)
* until a proper wakeup interrupt happens.
*/
- if (idle_should_enter_s2idle() || dev->use_deepest_state) {
+ if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) {
+ u64 max_latency_ns;
+
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
@@ -176,12 +178,16 @@ static void cpuidle_idle_call(void)
}
rcu_idle_exit();
+
+ max_latency_ns = U64_MAX;
+ } else {
+ max_latency_ns = dev->forced_idle_latency_limit_ns;
}
tick_nohz_idle_stop_tick();
rcu_idle_enter();
- next_state = cpuidle_find_deepest_state(drv, dev);
+ next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;
@@ -311,7 +317,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-void play_idle(unsigned long duration_us)
+void play_idle_precise(u64 duration_ns, u64 latency_ns)
{
struct idle_timer it;
@@ -323,29 +329,29 @@ void play_idle(unsigned long duration_us)
WARN_ON_ONCE(current->nr_cpus_allowed != 1);
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
- WARN_ON_ONCE(!duration_us);
+ WARN_ON_ONCE(!duration_ns);
rcu_sleep_check();
preempt_disable();
current->flags |= PF_IDLE;
- cpuidle_use_deepest_state(true);
+ cpuidle_use_deepest_state(latency_ns);
it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
it.timer.function = idle_inject_timer_fn;
- hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC),
+ hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
HRTIMER_MODE_REL_PINNED);
while (!READ_ONCE(it.done))
do_idle();
- cpuidle_use_deepest_state(false);
+ cpuidle_use_deepest_state(0);
current->flags &= ~PF_IDLE;
preempt_fold_need_resched();
preempt_enable();
}
-EXPORT_SYMBOL_GPL(play_idle);
+EXPORT_SYMBOL_GPL(play_idle_precise);
void cpu_startup_entry(enum cpuhp_state state)
{
diff --git a/kernel/signal.c b/kernel/signal.c
index c4da1ef56fdf..bcd46f547db3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2205,8 +2205,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
preempt_disable();
read_unlock(&tasklist_lock);
- preempt_enable_no_resched();
cgroup_enter_frozen();
+ preempt_enable_no_resched();
freezable_schedule();
cgroup_leave_frozen(true);
} else {
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 65eb796610dc..069ca78fb0bf 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -771,7 +771,7 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
/* fill PPS status fields */
pps_fill_timex(txc);
- txc->time.tv_sec = (time_t)ts->tv_sec;
+ txc->time.tv_sec = ts->tv_sec;
txc->time.tv_usec = ts->tv_nsec;
if (!(time_status & STA_NANO))
txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index 08c3c8049998..156f26fdc4c9 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -1146,6 +1146,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
if (DEC_IS_DYNALLOC(s->dict.mode)) {
if (s->dict.allocated < s->dict.size) {
+ s->dict.allocated = s->dict.size;
vfree(s->dict.buf);
s->dict.buf = vmalloc(s->dict.size);
if (s->dict.buf == NULL) {
diff --git a/mm/debug.c b/mm/debug.c
index 8345bb6e4769..0461df1207cb 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -67,28 +67,31 @@ void __dump_page(struct page *page, const char *reason)
*/
mapcount = PageSlab(page) ? 0 : page_mapcount(page);
- pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx",
- page, page_ref_count(page), mapcount,
- page->mapping, page_to_pgoff(page));
if (PageCompound(page))
- pr_cont(" compound_mapcount: %d", compound_mapcount(page));
- pr_cont("\n");
- if (PageAnon(page))
- pr_warn("anon ");
- else if (PageKsm(page))
- pr_warn("ksm ");
+ pr_warn("page:%px refcount:%d mapcount:%d mapping:%px "
+ "index:%#lx compound_mapcount: %d\n",
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page),
+ compound_mapcount(page));
+ else
+ pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx\n",
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page));
+ if (PageKsm(page))
+ pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags);
+ else if (PageAnon(page))
+ pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags);
else if (mapping) {
- pr_warn("%ps ", mapping->a_ops);
if (mapping->host && mapping->host->i_dentry.first) {
struct dentry *dentry;
dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
- pr_warn("name:\"%pd\" ", dentry);
- }
+ pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry);
+ } else
+ pr_warn("%ps\n", mapping->a_ops);
+ pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
}
BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
- pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
-
hex_only:
print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
sizeof(unsigned long), page,
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index f1930fa0b445..2ac38bdc18a1 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -196,7 +196,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
again:
rcu_read_lock();
h_cg = hugetlb_cgroup_from_task(current);
- if (!css_tryget_online(&h_cg->css)) {
+ if (!css_tryget(&h_cg->css)) {
rcu_read_unlock();
goto again;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f05d27b7183d..a8a57bebb5fa 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1602,17 +1602,6 @@ static void collapse_file(struct mm_struct *mm,
result = SCAN_FAIL;
goto xa_unlocked;
}
- } else if (!PageUptodate(page)) {
- xas_unlock_irq(&xas);
- wait_on_page_locked(page);
- if (!trylock_page(page)) {
- result = SCAN_PAGE_LOCK;
- goto xa_unlocked;
- }
- get_page(page);
- } else if (PageDirty(page)) {
- result = SCAN_FAIL;
- goto xa_locked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
@@ -1627,7 +1616,12 @@ static void collapse_file(struct mm_struct *mm,
* without racing with truncate.
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageUptodate(page), page);
+
+ /* make sure the page is up to date */
+ if (unlikely(!PageUptodate(page))) {
+ result = SCAN_FAIL;
+ goto out_unlock;
+ }
/*
* If file was truncated then extended, or hole-punched, before
@@ -1643,6 +1637,16 @@ static void collapse_file(struct mm_struct *mm,
goto out_unlock;
}
+ if (!is_shmem && PageDirty(page)) {
+ /*
+ * khugepaged only works on read-only fd, so this
+ * page is dirty because it hasn't been flushed
+ * since first write.
+ */
+ result = SCAN_FAIL;
+ goto out_unlock;
+ }
+
if (isolate_lru_page(page)) {
result = SCAN_DEL_PAGE_LRU;
goto out_unlock;
diff --git a/mm/madvise.c b/mm/madvise.c
index 2be9f3fdb05e..94c343b4c968 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -363,8 +363,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
ClearPageReferenced(page);
test_and_clear_page_young(page);
if (pageout) {
- if (!isolate_lru_page(page))
- list_add(&page->lru, &page_list);
+ if (!isolate_lru_page(page)) {
+ if (PageUnevictable(page))
+ putback_lru_page(page);
+ else
+ list_add(&page->lru, &page_list);
+ }
} else
deactivate_page(page);
huge_unlock:
@@ -441,8 +445,12 @@ regular_page:
ClearPageReferenced(page);
test_and_clear_page_young(page);
if (pageout) {
- if (!isolate_lru_page(page))
- list_add(&page->lru, &page_list);
+ if (!isolate_lru_page(page)) {
+ if (PageUnevictable(page))
+ putback_lru_page(page);
+ else
+ list_add(&page->lru, &page_list);
+ }
} else
deactivate_page(page);
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 37592dd7ae32..46ad252e6d6a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -960,7 +960,7 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
if (unlikely(!memcg))
memcg = root_mem_cgroup;
}
- } while (!css_tryget_online(&memcg->css));
+ } while (!css_tryget(&memcg->css));
rcu_read_unlock();
return memcg;
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 07e5c67f48a8..3b62a9ff8ea0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1646,6 +1646,18 @@ static int check_cpu_on_node(pg_data_t *pgdat)
return 0;
}
+static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
+{
+ int nid = *(int *)arg;
+
+ /*
+ * If a memory block belongs to multiple nodes, the stored nid is not
+ * reliable. However, such blocks are always online (e.g., cannot get
+ * offlined) and, therefore, are still spanned by the node.
+ */
+ return mem->nid == nid ? -EEXIST : 0;
+}
+
/**
* try_offline_node
* @nid: the node ID
@@ -1658,25 +1670,24 @@ static int check_cpu_on_node(pg_data_t *pgdat)
void try_offline_node(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
- unsigned long start_pfn = pgdat->node_start_pfn;
- unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
- unsigned long pfn;
-
- for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- unsigned long section_nr = pfn_to_section_nr(pfn);
-
- if (!present_section_nr(section_nr))
- continue;
+ int rc;
- if (pfn_to_nid(pfn) != nid)
- continue;
+ /*
+ * If the node still spans pages (especially ZONE_DEVICE), don't
+ * offline it. A node spans memory after move_pfn_range_to_zone(),
+ * e.g., after the memory block was onlined.
+ */
+ if (pgdat->node_spanned_pages)
+ return;
- /*
- * some memory sections of this node are not removed, and we
- * can't offline node now.
- */
+ /*
+ * Especially offline memory blocks might not be spanned by the
+ * node. They will get spanned by the node once they get onlined.
+ * However, they link to the node in sysfs and can get onlined later.
+ */
+ rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
+ if (rc)
return;
- }
if (check_cpu_on_node(pgdat))
return;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4ae967bcf954..e08c94170ae4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -672,7 +672,9 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
* 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
* specified.
* 0 - queue pages successfully or no misplaced page.
- * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
+ * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or
+ * memory range specified by nodemask and maxnode points outside
+ * your accessible address space (-EFAULT)
*/
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -1286,7 +1288,7 @@ static long do_mbind(unsigned long start, unsigned long len,
flags | MPOL_MF_INVERT, &pagelist);
if (ret < 0) {
- err = -EIO;
+ err = ret;
goto up_out;
}
@@ -1305,10 +1307,12 @@ static long do_mbind(unsigned long start, unsigned long len,
if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
err = -EIO;
- } else
- putback_movable_pages(&pagelist);
-
+ } else {
up_out:
+ if (!list_empty(&pagelist))
+ putback_movable_pages(&pagelist);
+ }
+
up_write(&mm->mmap_sem);
mpol_out:
mpol_put(new);
diff --git a/mm/page_io.c b/mm/page_io.c
index 24ee600f9131..60a66a58b9bf 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -73,6 +73,7 @@ static void swap_slot_free_notify(struct page *page)
{
struct swap_info_struct *sis;
struct gendisk *disk;
+ swp_entry_t entry;
/*
* There is no guarantee that the page is in swap cache - the software
@@ -104,11 +105,10 @@ static void swap_slot_free_notify(struct page *page)
* we again wish to reclaim it.
*/
disk = sis->bdev->bd_disk;
- if (disk->fops->swap_slot_free_notify) {
- swp_entry_t entry;
+ entry.val = page_private(page);
+ if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
unsigned long offset;
- entry.val = page_private(page);
offset = swp_offset(entry);
SetPageDirty(page);
diff --git a/mm/slub.c b/mm/slub.c
index b25c807a111f..e72e802fc569 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1433,12 +1433,15 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void *old_tail = *tail ? *tail : *head;
int rsize;
- if (slab_want_init_on_free(s)) {
- void *p = NULL;
+ /* Head and tail of the reconstructed freelist */
+ *head = NULL;
+ *tail = NULL;
- do {
- object = next;
- next = get_freepointer(s, object);
+ do {
+ object = next;
+ next = get_freepointer(s, object);
+
+ if (slab_want_init_on_free(s)) {
/*
* Clear the object and the metadata, but don't touch
* the redzone.
@@ -1448,29 +1451,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
: 0;
memset((char *)object + s->inuse, 0,
s->size - s->inuse - rsize);
- set_freepointer(s, object, p);
- p = object;
- } while (object != old_tail);
- }
-
-/*
- * Compiler cannot detect this function can be removed if slab_free_hook()
- * evaluates to nothing. Thus, catch all relevant config debug options here.
- */
-#if defined(CONFIG_LOCKDEP) || \
- defined(CONFIG_DEBUG_KMEMLEAK) || \
- defined(CONFIG_DEBUG_OBJECTS_FREE) || \
- defined(CONFIG_KASAN)
- next = *head;
-
- /* Head and tail of the reconstructed freelist */
- *head = NULL;
- *tail = NULL;
-
- do {
- object = next;
- next = get_freepointer(s, object);
+ }
/* If object's reuse doesn't have to be delayed */
if (!slab_free_hook(s, object)) {
/* Move object to the new freelist */
@@ -1485,9 +1467,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
*tail = NULL;
return *head != NULL;
-#else
- return true;
-#endif
}
static void *setup_object(struct kmem_cache *s, struct page *page,
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5518a7d9eed9..128d37a4c2e0 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -86,11 +86,12 @@ static atomic_t skbcounter = ATOMIC_INIT(0);
/* af_can socket functions */
-static void can_sock_destruct(struct sock *sk)
+void can_sock_destruct(struct sock *sk)
{
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_error_queue);
}
+EXPORT_SYMBOL(can_sock_destruct);
static const struct can_proto *can_get_proto(int protocol)
{
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index def2f813ffce..137054bff9ec 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -51,6 +51,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
if (!skb)
return;
+ j1939_priv_get(priv);
can_skb_set_owner(skb, iskb->sk);
/* get a pointer to the header of the skb
@@ -104,6 +105,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
j1939_simple_recv(priv, skb);
j1939_sk_recv(priv, skb);
done:
+ j1939_priv_put(priv);
kfree_skb(skb);
}
@@ -150,6 +152,10 @@ static void __j1939_priv_release(struct kref *kref)
netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
+ WARN_ON_ONCE(!list_empty(&priv->active_session_list));
+ WARN_ON_ONCE(!list_empty(&priv->ecus));
+ WARN_ON_ONCE(!list_empty(&priv->j1939_socks));
+
dev_put(ndev);
kfree(priv);
}
@@ -207,6 +213,9 @@ static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
{
struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+ if (!can_ml_priv)
+ return NULL;
+
return can_ml_priv->j1939_priv;
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 4d8ba701e15d..de09b0a65791 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -78,7 +78,6 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
{
jsk->state |= J1939_SOCK_BOUND;
j1939_priv_get(priv);
- jsk->priv = priv;
spin_lock_bh(&priv->j1939_socks_lock);
list_add_tail(&jsk->list, &priv->j1939_socks);
@@ -91,7 +90,6 @@ static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
list_del_init(&jsk->list);
spin_unlock_bh(&priv->j1939_socks_lock);
- jsk->priv = NULL;
j1939_priv_put(priv);
jsk->state &= ~J1939_SOCK_BOUND;
}
@@ -349,6 +347,34 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
spin_unlock_bh(&priv->j1939_socks_lock);
}
+static void j1939_sk_sock_destruct(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ /* This function will be call by the generic networking code, when then
+ * the socket is ultimately closed (sk->sk_destruct).
+ *
+ * The race between
+ * - processing a received CAN frame
+ * (can_receive -> j1939_can_recv)
+ * and accessing j1939_priv
+ * ... and ...
+ * - closing a socket
+ * (j1939_can_rx_unregister -> can_rx_unregister)
+ * and calling the final j1939_priv_put()
+ *
+ * is avoided by calling the final j1939_priv_put() from this
+ * RCU deferred cleanup call.
+ */
+ if (jsk->priv) {
+ j1939_priv_put(jsk->priv);
+ jsk->priv = NULL;
+ }
+
+ /* call generic CAN sock destruct */
+ can_sock_destruct(sk);
+}
+
static int j1939_sk_init(struct sock *sk)
{
struct j1939_sock *jsk = j1939_sk(sk);
@@ -371,6 +397,7 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+ sk->sk_destruct = j1939_sk_sock_destruct;
return 0;
}
@@ -443,6 +470,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
}
jsk->ifindex = addr->can_ifindex;
+
+ /* the corresponding j1939_priv_put() is called via
+ * sk->sk_destruct, which points to j1939_sk_sock_destruct()
+ */
+ j1939_priv_get(priv);
+ jsk->priv = priv;
}
/* set default transmit pgn */
@@ -560,8 +593,8 @@ static int j1939_sk_release(struct socket *sock)
if (!sk)
return 0;
- jsk = j1939_sk(sk);
lock_sock(sk);
+ jsk = j1939_sk(sk);
if (jsk->state & J1939_SOCK_BOUND) {
struct j1939_priv *priv = jsk->priv;
@@ -1059,51 +1092,72 @@ static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
struct j1939_sock *jsk = j1939_sk(sk);
- struct j1939_priv *priv = jsk->priv;
+ struct j1939_priv *priv;
int ifindex;
int ret;
+ lock_sock(sock->sk);
/* various socket state tests */
- if (!(jsk->state & J1939_SOCK_BOUND))
- return -EBADFD;
+ if (!(jsk->state & J1939_SOCK_BOUND)) {
+ ret = -EBADFD;
+ goto sendmsg_done;
+ }
+ priv = jsk->priv;
ifindex = jsk->ifindex;
- if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR)
+ if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) {
/* no source address assigned yet */
- return -EBADFD;
+ ret = -EBADFD;
+ goto sendmsg_done;
+ }
/* deal with provided destination address info */
if (msg->msg_name) {
struct sockaddr_can *addr = msg->msg_name;
- if (msg->msg_namelen < J1939_MIN_NAMELEN)
- return -EINVAL;
+ if (msg->msg_namelen < J1939_MIN_NAMELEN) {
+ ret = -EINVAL;
+ goto sendmsg_done;
+ }
- if (addr->can_family != AF_CAN)
- return -EINVAL;
+ if (addr->can_family != AF_CAN) {
+ ret = -EINVAL;
+ goto sendmsg_done;
+ }
- if (addr->can_ifindex && addr->can_ifindex != ifindex)
- return -EBADFD;
+ if (addr->can_ifindex && addr->can_ifindex != ifindex) {
+ ret = -EBADFD;
+ goto sendmsg_done;
+ }
if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
- !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
- return -EINVAL;
+ !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) {
+ ret = -EINVAL;
+ goto sendmsg_done;
+ }
if (!addr->can_addr.j1939.name &&
addr->can_addr.j1939.addr == J1939_NO_ADDR &&
- !sock_flag(sk, SOCK_BROADCAST))
+ !sock_flag(sk, SOCK_BROADCAST)) {
/* broadcast, but SO_BROADCAST not set */
- return -EACCES;
+ ret = -EACCES;
+ goto sendmsg_done;
+ }
} else {
if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
- !sock_flag(sk, SOCK_BROADCAST))
+ !sock_flag(sk, SOCK_BROADCAST)) {
/* broadcast, but SO_BROADCAST not set */
- return -EACCES;
+ ret = -EACCES;
+ goto sendmsg_done;
+ }
}
ret = j1939_sk_send_loop(priv, sk, msg, size);
+sendmsg_done:
+ release_sock(sock->sk);
+
return ret;
}
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index e5f1a56994c6..9f99af5b0b11 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -255,6 +255,7 @@ static void __j1939_session_drop(struct j1939_session *session)
return;
j1939_sock_pending_del(session->sk);
+ sock_put(session->sk);
}
static void j1939_session_destroy(struct j1939_session *session)
@@ -266,6 +267,9 @@ static void j1939_session_destroy(struct j1939_session *session)
netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+ WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
+ WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
+
skb_queue_purge(&session->skb_queue);
__j1939_session_drop(session);
j1939_priv_put(session->priv);
@@ -1042,12 +1046,13 @@ j1939_session_deactivate_activate_next(struct j1939_session *session)
j1939_sk_queue_activate_next(session);
}
-static void j1939_session_cancel(struct j1939_session *session,
+static void __j1939_session_cancel(struct j1939_session *session,
enum j1939_xtp_abort err)
{
struct j1939_priv *priv = session->priv;
WARN_ON_ONCE(!err);
+ lockdep_assert_held(&session->priv->active_session_list_lock);
session->err = j1939_xtp_abort_to_errno(priv, err);
/* do not send aborts on incoming broadcasts */
@@ -1062,6 +1067,20 @@ static void j1939_session_cancel(struct j1939_session *session,
j1939_sk_send_loop_abort(session->sk, session->err);
}
+static void j1939_session_cancel(struct j1939_session *session,
+ enum j1939_xtp_abort err)
+{
+ j1939_session_list_lock(session->priv);
+
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_WAITING_ABORT) {
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ __j1939_session_cancel(session, err);
+ }
+
+ j1939_session_list_unlock(session->priv);
+}
+
static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
{
struct j1939_session *session =
@@ -1108,8 +1127,6 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
__func__, session, ret);
if (session->skcb.addr.type != J1939_SIMPLE) {
- j1939_tp_set_rxtimeout(session,
- J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
} else {
session->err = ret;
@@ -1169,7 +1186,7 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
hrtimer_start(&session->rxtimer,
ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
HRTIMER_MODE_REL_SOFT);
- j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+ __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
}
j1939_session_list_unlock(session->priv);
}
@@ -1375,7 +1392,6 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
out_session_cancel:
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, err);
}
@@ -1572,7 +1588,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
/* RTS on active session */
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
}
@@ -1583,7 +1598,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
session->last_cmd);
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
return -EBUSY;
@@ -1785,7 +1799,6 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
out_session_cancel:
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
j1939_session_put(session);
}
@@ -1866,6 +1879,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
return ERR_PTR(-ENOMEM);
/* skb is recounted in j1939_session_new() */
+ sock_hold(skb->sk);
session->sk = skb->sk;
session->transmission = true;
session->pkt.total = (size + 6) / 7;
@@ -2028,7 +2042,11 @@ int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
&priv->active_session_list,
active_session_list_entry) {
if (!sk || sk == session->sk) {
- j1939_session_timers_cancel(session);
+ if (hrtimer_try_to_cancel(&session->txtimer) == 1)
+ j1939_session_put(session);
+ if (hrtimer_try_to_cancel(&session->rxtimer) == 1)
+ j1939_session_put(session);
+
session->err = ESHUTDOWN;
j1939_session_deactivate_locked(session);
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f80151eeaf51..93905dc7c179 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2699,7 +2699,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
struct devlink *devlink = info->user_ptr[0];
int err;
- if (!devlink_reload_supported(devlink))
+ if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4618,6 +4618,7 @@ struct devlink_health_reporter {
bool auto_recover;
u8 health_state;
u64 dump_ts;
+ u64 dump_real_ts;
u64 error_count;
u64 recovery_count;
u64 last_recovery_ts;
@@ -4790,6 +4791,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
goto dump_err;
reporter->dump_ts = jiffies;
+ reporter->dump_real_ts = ktime_get_real_ns();
return 0;
@@ -4952,6 +4954,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
jiffies_to_msecs(reporter->dump_ts),
DEVLINK_ATTR_PAD))
goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
nla_nest_end(msg, reporter_attr);
genlmsg_end(msg, hdr);
@@ -6196,6 +6202,8 @@ EXPORT_SYMBOL_GPL(devlink_register);
void devlink_unregister(struct devlink *devlink)
{
mutex_lock(&devlink_mutex);
+ WARN_ON(devlink_reload_supported(devlink) &&
+ devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
list_del(&devlink->list);
mutex_unlock(&devlink_mutex);
@@ -6203,6 +6211,41 @@ void devlink_unregister(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devlink_unregister);
/**
+ * devlink_reload_enable - Enable reload of devlink instance
+ *
+ * @devlink: devlink
+ *
+ * Should be called at end of device initialization
+ * process when reload operation is supported.
+ */
+void devlink_reload_enable(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->reload_enabled = true;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_enable);
+
+/**
+ * devlink_reload_disable - Disable reload of devlink instance
+ *
+ * @devlink: devlink
+ *
+ * Should be called at the beginning of device cleanup
+ * process when reload operation is supported.
+ */
+void devlink_reload_disable(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ /* Mutex is taken which ensures that no reload operation is in
+ * progress while setting up forbidded flag.
+ */
+ devlink->reload_enabled = false;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_disable);
+
+/**
* devlink_free - Free devlink instance resources
*
* @devlink: devlink
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 9c1cc2482b68..9e5a883a9f0c 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -106,7 +106,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
slave = ds->ports[port].slave;
err = br_vlan_get_pvid(slave, &pvid);
- if (err < 0)
+ if (!pvid || err < 0)
/* There is no pvid on the bridge for this port, which is
* perfectly valid. Nothing to restore, bye-bye!
*/
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 716d5472c022..58007439cffd 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2289,7 +2289,8 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
rcu_read_unlock();
return -ENODEV;
}
- skb2 = skb_clone(skb, GFP_ATOMIC);
+
+ skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
if (!skb2) {
read_unlock(&mrt_lock);
rcu_read_unlock();
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9d4f75e0d33a..e70567446f28 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -81,6 +81,11 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
if (!pskb_may_pull(skb, srhoff + len))
return NULL;
+ /* note that pskb_may_pull may change pointers in header;
+ * for this reason it is necessary to reload them when needed.
+ */
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
if (!seg6_validate_srh(srh, len))
return NULL;
@@ -336,6 +341,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
@@ -365,6 +372,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err)
goto drop;
@@ -385,6 +394,8 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 233f1368162b..18c6fac6ead9 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -450,6 +450,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct ib_qp_init_attr attr;
struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev;
+ unsigned long max_wrs;
int ret, fr_queue_space;
/*
@@ -469,10 +470,15 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
/* add the conn now so that connection establishment has the dev */
rds_ib_add_conn(rds_ibdev, conn);
- if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
- rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
- if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
- rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
+ max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ?
+ rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr;
+ if (ic->i_send_ring.w_nr != max_wrs)
+ rds_ib_ring_resize(&ic->i_send_ring, max_wrs);
+
+ max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ?
+ rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr;
+ if (ic->i_recv_ring.w_nr != max_wrs)
+ rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);
/* Protection domain and memory range */
ic->i_pd = rds_ibdev->pd;
@@ -1099,8 +1105,9 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_flowctl = 0;
atomic_set(&ic->i_credits, 0);
- rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
- rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+ /* Re-init rings, but retain sizes. */
+ rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr);
+ rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);
if (ic->i_ibinc) {
rds_inc_put(&ic->i_ibinc->ii_inc);
@@ -1147,8 +1154,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
* rds_ib_conn_shutdown() waits for these to be emptied so they
* must be initialized before it can be called.
*/
- rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
- rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+ rds_ib_ring_init(&ic->i_send_ring, 0);
+ rds_ib_ring_init(&ic->i_recv_ring, 0);
ic->conn = conn;
conn->c_transport_data = ic;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 47946f489fd4..737b49909a7a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -796,6 +796,7 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_err = EPIPE;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
+ sock_put(&smc->sk); /* passive closing */
goto out;
}
@@ -1731,7 +1732,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
- if (sk->sk_state == SMC_INIT) {
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..8f35060a24e1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -34,8 +34,6 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include "core.h"
#include "name_table.h"
#include "subscr.h"
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 60d829581068..3042f654e0af 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,6 +60,12 @@
#include <linux/rhashtable.h>
#include <net/genetlink.h>
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
struct tipc_node;
struct tipc_bearer;
struct tipc_bc_base;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 9b599ed66d97..2c86a2fc3915 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -480,6 +480,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
else
XFRM_INC_STATS(net,
LINUX_MIB_XFRMINSTATEINVALID);
+
+ if (encap_type == -1)
+ dev_put(skb->dev);
goto drop;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index c6f3c4a1bd99..f3423562d933 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -495,6 +495,8 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
x->type->destructor(x);
xfrm_put_type(x->type);
}
+ if (x->xfrag.page)
+ put_page(x->xfrag.page);
xfrm_dev_state_free(x);
security_xfrm_state_free(x);
xfrm_state_free(x);
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh
index 97a2c844a95e..45e8aa360b45 100755
--- a/scripts/tools-support-relr.sh
+++ b/scripts/tools-support-relr.sh
@@ -4,13 +4,13 @@
tmp_file=$(mktemp)
trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
-cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1
+cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1
void *p = &p;
END
-"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
# Despite printing an error message, GNU nm still exits with exit code 0 if it
# sees a relr section. So we need to check that nothing is printed to stderr.
-test -z "$("$NM" $tmp_file 2>&1 >/dev/null)"
+test -z "$($NM $tmp_file 2>&1 >/dev/null)"
-"$OBJCOPY" -O binary $tmp_file $tmp_file.bin
+$OBJCOPY -O binary $tmp_file $tmp_file.bin
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d80041ea4e01..2236b5e0c1f2 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1782,11 +1782,14 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
struct snd_pcm_runtime *runtime;
unsigned long flags;
- if (PCM_RUNTIME_CHECK(substream))
+ if (snd_BUG_ON(!substream))
return;
- runtime = substream->runtime;
snd_pcm_stream_lock_irqsave(substream, flags);
+ if (PCM_RUNTIME_CHECK(substream))
+ goto _unlock;
+ runtime = substream->runtime;
+
if (!snd_pcm_running(substream) ||
snd_pcm_update_hw_ptr0(substream, 1) < 0)
goto _end;
@@ -1797,6 +1800,7 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
#endif
_end:
kill_fasync(&runtime->fasync, SIGIO, POLL_IN);
+ _unlock:
snd_pcm_stream_unlock_irqrestore(substream, flags);
}
EXPORT_SYMBOL(snd_pcm_period_elapsed);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index cf53fbd872ee..c52419376c74 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2396,6 +2396,9 @@ static const struct pci_device_id azx_ids[] = {
/* CometLake-H */
{ PCI_DEVICE(0x8086, 0x06C8),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ /* CometLake-S */
+ { PCI_DEVICE(0x8086, 0xa3f0),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
/* Icelake */
{ PCI_DEVICE(0x8086, 0x34c8),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 3c720703ebb8..78bd2e3722c7 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -46,10 +46,12 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
((codec)->core.vendor_id == 0x80862800))
#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
#define is_icelake(codec) ((codec)->core.vendor_id == 0x8086280f)
+#define is_tigerlake(codec) ((codec)->core.vendor_id == 0x80862812)
#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
|| is_skylake(codec) || is_broxton(codec) \
|| is_kabylake(codec) || is_geminilake(codec) \
- || is_cannonlake(codec) || is_icelake(codec))
+ || is_cannonlake(codec) || is_icelake(codec) \
+ || is_tigerlake(codec))
#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index a2ab8e8d3a93..4a9a2f6ef5a4 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -388,6 +388,9 @@ static void snd_complete_urb(struct urb *urb)
}
prepare_outbound_urb(ep, ctx);
+ /* can be stopped during prepare callback */
+ if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags)))
+ goto exit_clear;
} else {
retire_inbound_urb(ep, ctx);
/* can be stopped during retire callback */
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 3fd1d1749edf..45eee5cc312e 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1229,7 +1229,8 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
if (cval->min + cval->res < cval->max) {
int last_valid_res = cval->res;
int saved, test, check;
- get_cur_mix_raw(cval, minchn, &saved);
+ if (get_cur_mix_raw(cval, minchn, &saved) < 0)
+ goto no_res_check;
for (;;) {
test = saved;
if (test < cval->max)
@@ -1249,6 +1250,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
snd_usb_set_cur_mix_value(cval, minchn, 0, saved);
}
+no_res_check:
cval->initialized = 1;
}
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 0bbe1201a6ac..349e1e52996d 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -248,8 +248,8 @@ static int create_yamaha_midi_quirk(struct snd_usb_audio *chip,
NULL, USB_MS_MIDI_OUT_JACK);
if (!injd && !outjd)
return -ENODEV;
- if (!(injd && snd_usb_validate_midi_desc(injd)) ||
- !(outjd && snd_usb_validate_midi_desc(outjd)))
+ if ((injd && !snd_usb_validate_midi_desc(injd)) ||
+ (outjd && !snd_usb_validate_midi_desc(outjd)))
return -ENODEV;
if (injd && (injd->bLength < 5 ||
(injd->bJackType != USB_MS_EMBEDDED &&
diff --git a/sound/usb/validate.c b/sound/usb/validate.c
index a5e584b60dcd..389e8657434a 100644
--- a/sound/usb/validate.c
+++ b/sound/usb/validate.c
@@ -81,9 +81,9 @@ static bool validate_processing_unit(const void *p,
switch (v->protocol) {
case UAC_VERSION_1:
default:
- /* bNrChannels, wChannelConfig, iChannelNames, bControlSize */
- len += 1 + 2 + 1 + 1;
- if (d->bLength < len) /* bControlSize */
+ /* bNrChannels, wChannelConfig, iChannelNames */
+ len += 1 + 2 + 1;
+ if (d->bLength < len + 1) /* bControlSize */
return false;
m = hdr[len];
len += 1 + m + 1; /* bControlSize, bmControls, iProcessing */
diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/ToDo
index 6e8b89f282e6..b196a139a3e4 100644
--- a/tools/power/cpupower/ToDo
+++ b/tools/power/cpupower/ToDo
@@ -8,3 +8,17 @@ ToDos sorted by priority:
- Add another c1e debug idle monitor
-> Is by design racy with BIOS, but could be added
with a --force option and some "be careful" messages
+- Add cpu_start()/cpu_stop() callbacks for monitor
+ -> This is to move the per_cpu logic from inside the
+ monitor to outside it. This can be given higher
+ priority in fork_it.
+- Fork as many processes as there are CPUs in case the
+ per_cpu_schedule flag is set.
+ -> Bind forked process to each cpu.
+ -> Execute start measures via the forked processes on
+ each cpu.
+ -> Run test executable in a forked process.
+ -> Execute stop measures via the forked processes on
+ each cpu.
+ This would be ideal as it will not introduce noise in the
+ tested executable.
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index 4c9d342b70ff..d3755ea70d4d 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -10,6 +10,7 @@
#include <errno.h>
#include <string.h>
#include <getopt.h>
+#include <sys/utsname.h>
#include "helpers/helpers.h"
#include "helpers/sysfs.h"
@@ -30,6 +31,7 @@ int cmd_info(int argc, char **argv)
extern char *optarg;
extern int optind, opterr, optopt;
unsigned int cpu;
+ struct utsname uts;
union {
struct {
@@ -39,6 +41,13 @@ int cmd_info(int argc, char **argv)
} params = {};
int ret = 0;
+ ret = uname(&uts);
+ if (!ret && (!strcmp(uts.machine, "ppc64le") ||
+ !strcmp(uts.machine, "ppc64"))) {
+ fprintf(stderr, _("Subcommand not supported on POWER.\n"));
+ return ret;
+ }
+
setlocale(LC_ALL, "");
textdomain(PACKAGE);
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 3cd95c6cb974..3cca6f715dd9 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -10,6 +10,7 @@
#include <errno.h>
#include <string.h>
#include <getopt.h>
+#include <sys/utsname.h>
#include "helpers/helpers.h"
#include "helpers/sysfs.h"
@@ -31,6 +32,7 @@ int cmd_set(int argc, char **argv)
extern char *optarg;
extern int optind, opterr, optopt;
unsigned int cpu;
+ struct utsname uts;
union {
struct {
@@ -41,6 +43,13 @@ int cmd_set(int argc, char **argv)
int perf_bias = 0;
int ret = 0;
+ ret = uname(&uts);
+ if (!ret && (!strcmp(uts.machine, "ppc64le") ||
+ !strcmp(uts.machine, "ppc64"))) {
+ fprintf(stderr, _("Subcommand not supported on POWER.\n"));
+ return ret;
+ }
+
setlocale(LC_ALL, "");
textdomain(PACKAGE);
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 5cc39d4e23ed..73bfafc60e9b 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -131,6 +131,10 @@ out:
if (ext_cpuid_level >= 0x80000007 &&
(cpuid_edx(0x80000007) & (1 << 9)))
cpu_info->caps |= CPUPOWER_CAP_AMD_CBP;
+
+ if (ext_cpuid_level >= 0x80000008 &&
+ cpuid_ebx(0x80000008) & (1 << 4))
+ cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU;
}
if (cpu_info->vendor == X86_VENDOR_INTEL) {
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 357b19bb136e..c258eeccd05f 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -69,6 +69,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
#define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010
#define CPUPOWER_CAP_IS_SNB 0x00000020
#define CPUPOWER_CAP_INTEL_IDA 0x00000040
+#define CPUPOWER_CAP_AMD_RDPRU 0x00000080
#define CPUPOWER_AMD_CPBDIS 0x02000000
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 3f893b99b337..33dc34db4f3c 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -328,7 +328,7 @@ struct cpuidle_monitor amd_fam14h_monitor = {
.stop = amd_fam14h_stop,
.do_register = amd_fam14h_register,
.unregister = amd_fam14h_unregister,
- .needs_root = 1,
+ .flags.needs_root = 1,
.overflow_s = OVERFLOW_MS / 1000,
};
#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index f634aeb65c5f..3c4cee160b0e 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -207,6 +207,6 @@ struct cpuidle_monitor cpuidle_sysfs_monitor = {
.stop = cpuidle_stop,
.do_register = cpuidle_register,
.unregister = cpuidle_unregister,
- .needs_root = 0,
+ .flags.needs_root = 0,
.overflow_s = UINT_MAX,
};
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index d3c3e6e7aa26..6d44fec55ad5 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -408,7 +408,7 @@ int cmd_monitor(int argc, char **argv)
dprint("Try to register: %s\n", all_monitors[num]->name);
test_mon = all_monitors[num]->do_register();
if (test_mon) {
- if (test_mon->needs_root && !run_as_root) {
+ if (test_mon->flags.needs_root && !run_as_root) {
fprintf(stderr, _("Available monitor %s needs "
"root access\n"), test_mon->name);
continue;
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index a2d901d3bfaf..5b5eb1da0cce 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -60,7 +60,10 @@ struct cpuidle_monitor {
struct cpuidle_monitor* (*do_register) (void);
void (*unregister)(void);
unsigned int overflow_s;
- int needs_root;
+ struct {
+ unsigned int needs_root:1;
+ unsigned int per_cpu_schedule:1;
+ } flags;
};
extern long long timespec_diff_us(struct timespec start, struct timespec end);
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index 7c7451d3f494..97ad3233a521 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -39,7 +39,6 @@ static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = {
{
.name = "PC9",
.desc = N_("Processor Package C9"),
- .desc = N_("Processor Package C2"),
.id = PC9,
.range = RANGE_PACKAGE,
.get_count_percent = hsw_ext_get_count_percent,
@@ -188,7 +187,7 @@ struct cpuidle_monitor intel_hsw_ext_monitor = {
.stop = hsw_ext_stop,
.do_register = hsw_ext_register,
.unregister = hsw_ext_unregister,
- .needs_root = 1,
+ .flags.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index 44806a6dae11..e7d48cb563c0 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -19,6 +19,10 @@
#define MSR_APERF 0xE8
#define MSR_MPERF 0xE7
+#define RDPRU ".byte 0x0f, 0x01, 0xfd"
+#define RDPRU_ECX_MPERF 0
+#define RDPRU_ECX_APERF 1
+
#define MSR_TSC 0x10
#define MSR_AMD_HWCR 0xc0010015
@@ -86,15 +90,51 @@ static int mperf_get_tsc(unsigned long long *tsc)
return ret;
}
+static int get_aperf_mperf(int cpu, unsigned long long *aval,
+ unsigned long long *mval)
+{
+ unsigned long low_a, high_a;
+ unsigned long low_m, high_m;
+ int ret;
+
+ /*
+ * Running on the cpu from which we read the registers will
+ * prevent APERF/MPERF from going out of sync because of IPI
+ * latency introduced by read_msr()s.
+ */
+ if (mperf_monitor.flags.per_cpu_schedule) {
+ if (bind_cpu(cpu))
+ return 1;
+ }
+
+ if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_RDPRU) {
+ asm volatile(RDPRU
+ : "=a" (low_a), "=d" (high_a)
+ : "c" (RDPRU_ECX_APERF));
+ asm volatile(RDPRU
+ : "=a" (low_m), "=d" (high_m)
+ : "c" (RDPRU_ECX_MPERF));
+
+ *aval = ((low_a) | (high_a) << 32);
+ *mval = ((low_m) | (high_m) << 32);
+
+ return 0;
+ }
+
+ ret = read_msr(cpu, MSR_APERF, aval);
+ ret |= read_msr(cpu, MSR_MPERF, mval);
+
+ return ret;
+}
+
static int mperf_init_stats(unsigned int cpu)
{
- unsigned long long val;
+ unsigned long long aval, mval;
int ret;
- ret = read_msr(cpu, MSR_APERF, &val);
- aperf_previous_count[cpu] = val;
- ret |= read_msr(cpu, MSR_MPERF, &val);
- mperf_previous_count[cpu] = val;
+ ret = get_aperf_mperf(cpu, &aval, &mval);
+ aperf_previous_count[cpu] = aval;
+ mperf_previous_count[cpu] = mval;
is_valid[cpu] = !ret;
return 0;
@@ -102,13 +142,12 @@ static int mperf_init_stats(unsigned int cpu)
static int mperf_measure_stats(unsigned int cpu)
{
- unsigned long long val;
+ unsigned long long aval, mval;
int ret;
- ret = read_msr(cpu, MSR_APERF, &val);
- aperf_current_count[cpu] = val;
- ret |= read_msr(cpu, MSR_MPERF, &val);
- mperf_current_count[cpu] = val;
+ ret = get_aperf_mperf(cpu, &aval, &mval);
+ aperf_current_count[cpu] = aval;
+ mperf_current_count[cpu] = mval;
is_valid[cpu] = !ret;
return 0;
@@ -305,6 +344,9 @@ struct cpuidle_monitor *mperf_register(void)
if (init_maxfreq_mode())
return NULL;
+ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD)
+ mperf_monitor.flags.per_cpu_schedule = 1;
+
/* Free this at program termination */
is_valid = calloc(cpu_count, sizeof(int));
mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
@@ -333,7 +375,7 @@ struct cpuidle_monitor mperf_monitor = {
.stop = mperf_stop,
.do_register = mperf_register,
.unregister = mperf_unregister,
- .needs_root = 1,
+ .flags.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index be7256696a37..114271165182 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -208,7 +208,7 @@ struct cpuidle_monitor intel_nhm_monitor = {
.stop = nhm_stop,
.do_register = intel_nhm_register,
.unregister = intel_nhm_unregister,
- .needs_root = 1,
+ .flags.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index 968333571cad..df8b223cc096 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -192,7 +192,7 @@ struct cpuidle_monitor intel_snb_monitor = {
.stop = snb_stop,
.do_register = snb_register,
.unregister = snb_unregister,
- .needs_root = 1,
+ .flags.needs_root = 1,
.overflow_s = 922000000 /* 922337203 seconds TSC overflow
at 20GHz */
};
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index ae6146ec5afd..4632f51af7ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -112,14 +112,16 @@ sanitization_single_dev_mcast_group_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0
+ ip link add name dummy1 up type dummy
ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
- dev $swp2 group 239.0.0.1
+ dev dummy1 group 239.0.0.1
sanitization_single_dev_test_fail
ip link del dev vxlan0
+ ip link del dev dummy1
ip link del dev br0
log_test "vxlan device with a multicast group"
@@ -181,13 +183,15 @@ sanitization_single_dev_local_interface_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0
+ ip link add name dummy1 up type dummy
ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2
+ ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
sanitization_single_dev_test_fail
ip link del dev vxlan0
+ ip link del dev dummy1
ip link del dev br0
log_test "vxlan device with local interface"
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 4911fc77d0f6..d1cf9f6e0e6b 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -55,7 +55,7 @@ static void test_dump_stack(void)
#pragma GCC diagnostic pop
}
-static pid_t gettid(void)
+static pid_t _gettid(void)
{
return syscall(SYS_gettid);
}
@@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str,
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
" pid=%d tid=%d - %s\n",
- file, line, exp_str, getpid(), gettid(),
+ file, line, exp_str, getpid(), _gettid(),
strerror(errno));
test_dump_stack();
if (fmt) {
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index bd4a7247b44f..c0dd10257df5 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -44,6 +44,46 @@ static int clock_adjtime(clockid_t id, struct timex *tx)
}
#endif
+static void show_flag_test(int rq_index, unsigned int flags, int err)
+{
+ printf("PTP_EXTTS_REQUEST%c flags 0x%08x : (%d) %s\n",
+ rq_index ? '1' + rq_index : ' ',
+ flags, err, strerror(errno));
+ /* sigh, uClibc ... */
+ errno = 0;
+}
+
+static void do_flag_test(int fd, unsigned int index)
+{
+ struct ptp_extts_request extts_request;
+ unsigned long request[2] = {
+ PTP_EXTTS_REQUEST,
+ PTP_EXTTS_REQUEST2,
+ };
+ unsigned int enable_flags[5] = {
+ PTP_ENABLE_FEATURE,
+ PTP_ENABLE_FEATURE | PTP_RISING_EDGE,
+ PTP_ENABLE_FEATURE | PTP_FALLING_EDGE,
+ PTP_ENABLE_FEATURE | PTP_RISING_EDGE | PTP_FALLING_EDGE,
+ PTP_ENABLE_FEATURE | (PTP_EXTTS_VALID_FLAGS + 1),
+ };
+ int err, i, j;
+
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 5; j++) {
+ extts_request.flags = enable_flags[j];
+ err = ioctl(fd, request[i], &extts_request);
+ show_flag_test(i, extts_request.flags, err);
+
+ extts_request.flags = 0;
+ err = ioctl(fd, request[i], &extts_request);
+ }
+ }
+}
+
static clockid_t get_clockid(int fd)
{
#define CLOCKFD 3
@@ -96,7 +136,8 @@ static void usage(char *progname)
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
" -t val shift the ptp clock time by 'val' seconds\n"
- " -T val set the ptp clock time to 'val' seconds\n",
+ " -T val set the ptp clock time to 'val' seconds\n"
+ " -z test combinations of rising/falling external time stamp flags\n",
progname);
}
@@ -122,6 +163,7 @@ int main(int argc, char *argv[])
int adjtime = 0;
int capabilities = 0;
int extts = 0;
+ int flagtest = 0;
int gettime = 0;
int index = 0;
int list_pins = 0;
@@ -138,7 +180,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -191,6 +233,9 @@ int main(int argc, char *argv[])
settime = 3;
seconds = atoi(optarg);
break;
+ case 'z':
+ flagtest = 1;
+ break;
case 'h':
usage(progname);
return 0;
@@ -322,6 +367,10 @@ int main(int argc, char *argv[])
}
}
+ if (flagtest) {
+ do_flag_test(fd, index);
+ }
+
if (list_pins) {
int n_pins = 0;
if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d6f0696d98ef..13efc291b1c7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -50,6 +50,7 @@
#include <linux/bsearch.h>
#include <linux/io.h>
#include <linux/lockdep.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/ioctl.h>
@@ -121,9 +122,22 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#define KVM_COMPAT(c) .compat_ioctl = (c)
#else
+/*
+ * For architectures that don't implement a compat infrastructure,
+ * adopt a double line of defense:
+ * - Prevent a compat task from opening /dev/kvm
+ * - If the open has been done by a 64bit task, and the KVM fd
+ * passed to a compat task, let the ioctls fail.
+ */
static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg) { return -EINVAL; }
-#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl
+
+static int kvm_no_compat_open(struct inode *inode, struct file *file)
+{
+ return is_compat_task() ? -ENODEV : 0;
+}
+#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
+ .open = kvm_no_compat_open
#endif
static int hardware_enable_all(void);
static void hardware_disable_all(void);
@@ -149,10 +163,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
return 0;
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -625,6 +659,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
}
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -645,6 +696,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
+
+ refcount_set(&kvm->users_count, 1);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_memslots *slots = kvm_alloc_memslots();
@@ -662,7 +719,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
goto out_err_no_arch_destroy_vm;
}
- refcount_set(&kvm->users_count, 1);
r = kvm_arch_init_vm(kvm, type);
if (r)
goto out_err_no_arch_destroy_vm;
@@ -675,13 +731,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
- if (init_srcu_struct(&kvm->srcu))
- goto out_err_no_srcu;
- if (init_srcu_struct(&kvm->irq_srcu))
- goto out_err_no_irq_srcu;
-
r = kvm_init_mmu_notifier(kvm);
if (r)
+ goto out_err_no_mmu_notifier;
+
+ r = kvm_arch_post_init_vm(kvm);
+ if (r)
goto out_err;
mutex_lock(&kvm_lock);
@@ -693,19 +748,24 @@ static struct kvm *kvm_create_vm(unsigned long type)
return kvm;
out_err:
- cleanup_srcu_struct(&kvm->irq_srcu);
-out_err_no_irq_srcu:
- cleanup_srcu_struct(&kvm->srcu);
-out_err_no_srcu:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ if (kvm->mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
hardware_disable_all();
out_err_no_disable:
kvm_arch_destroy_vm(kvm);
- WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
out_err_no_arch_destroy_vm:
+ WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
+ cleanup_srcu_struct(&kvm->srcu);
+out_err_no_srcu:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -737,6 +797,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
mutex_unlock(&kvm_lock);
+ kvm_arch_pre_destroy_vm(kvm);
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -1857,7 +1919,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
SetPageDirty(page);
@@ -1867,7 +1929,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@ -4371,3 +4433,86 @@ void kvm_exit(void)
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+ struct kvm *kvm;
+ struct task_struct *parent;
+ struct completion init_done;
+ kvm_vm_thread_fn_t thread_fn;
+ uintptr_t data;
+ int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+ /*
+ * The init_context is allocated on the stack of the parent thread, so
+ * we have to locally copy anything that is needed beyond initialization
+ */
+ struct kvm_vm_worker_thread_context *init_context = context;
+ struct kvm *kvm = init_context->kvm;
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+ uintptr_t data = init_context->data;
+ int err;
+
+ err = kthread_park(current);
+ /* kthread_park(current) is never supposed to return an error */
+ WARN_ON(err != 0);
+ if (err)
+ goto init_complete;
+
+ err = cgroup_attach_task_all(init_context->parent, current);
+ if (err) {
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+ __func__, err);
+ goto init_complete;
+ }
+
+ set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+ init_context->err = err;
+ complete(&init_context->init_done);
+ init_context = NULL;
+
+ if (err)
+ return err;
+
+ /* Wait to be woken up by the spawner before proceeding. */
+ kthread_parkme();
+
+ if (!kthread_should_stop())
+ err = thread_fn(kvm, data);
+
+ return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr)
+{
+ struct kvm_vm_worker_thread_context init_context = {};
+ struct task_struct *thread;
+
+ *thread_ptr = NULL;
+ init_context.kvm = kvm;
+ init_context.parent = current;
+ init_context.thread_fn = thread_fn;
+ init_context.data = data;
+ init_completion(&init_context.init_done);
+
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
+ "%s-%d", name, task_pid_nr(current));
+ if (IS_ERR(thread))
+ return PTR_ERR(thread);
+
+ /* kthread_run is never supposed to return NULL */
+ WARN_ON(thread == NULL);
+
+ wait_for_completion(&init_context.init_done);
+
+ if (!init_context.err)
+ *thread_ptr = thread;
+
+ return init_context.err;
+}